#! /bin/sh # Scans C News relaynews log and generates a shell script that sends # mail to admins of sites responsible for bad messages. thishost=csri.toronto.edu args="wnd"; case "$1" in -q) args=""; shift;; -v) args="dnfmxw"; shift;; -o) shift; args="$1"; shift;; esac # gawk needed because of nawk hard limits on strings. Could be # worked around if really necessary. gawk 'BEGIN { args="'"$args"'"; skipud = index(args, "d") # unparsable date skipnh = index(args, "n"); # non-header line in header skipdf = index(args, "f"); # date in future skipnm = index(args, "m"); # no message-id skipxh = index(args, "x"); # missing header skipws = index(args, "w"); # whitespace in messageid # to add: illegal messageid } function saveit(msgid, line, arr) { nf = split(msgid, v, "@"); if (nf != 2) { printf ("weird message-id: %s\n", msgid); return; } # chop off trailing ">" len = length(v[2]); if (substr(v[2], len, 1) != ">") { printf ("weird message-id: %s\n", msgid); return; } machine = substr(v[2], 1, len - 1); arr[machine] = arr[machine] line "\n"; } function extract(s, startchar, endchar, tmp1, tmp2, tmp) { # s, startchar, endchar are arguments, rest are locals. # This returns whatever is in s between startchar and endchar (not # inclusive). tmp1 = index(s, startchar); if (tmp1 != 0) { tmp = substr(s, tmp1 + 1, length(s) - tmp1); tmp2 = index(tmp, endchar); if (tmp2 != 0) tmp = substr(tmp, 1, tmp2 - 1); } else tmp = ""; return tmp; } function complain(s, msg, arr) { for (m in arr) { printf "Mail -s \"%s\" usenet@%s << \"EOF\"\nHi\n\n", s, m; printf "%s\nThe following is from our news log:\n\n%s", msg,\ arr[m]; print "EOF\n:"; } } $5 != "-" { next; } /duplicate$/ { next; } /all groups .* excluded in active/ { next; } /older than [0-9]* days/ { if (days == "") { # The for() loop is not strictly necessary, but it makes this # robust in the face of modified logs, eg. nntplink -X mods for (i = 6; i <= NF; i++) { if ($i == "older") { i += 2; days = $i; } } } old++; next; } /unparsable Date:/ { unparsable++; saveit($6, $0, ud); next; } /contains non-header line/ { nonheader++; saveit($6, $0, nh); next; } /Date: in the future:/ { future++; saveit($6, $0, df); next; } /no Message-ID: header/ { nomsg++; saveit("", $0, nm); next; } /no .*: header/ { xheader++; saveit($6, $0, xh); next; } /unapproved article in moderated group/ { unapproved++; next; } /whitespace in Message-ID/ { whitespace++; s = "") ">"; saveit(s, $0, ws); next; } { unknown++; xx = xx $0 "\n"; } END { print "cat << \"EOF\"\nSummary:\n" printf "%5d : older than %s days\n", old, days; printf "%5d : unparsable Date\n", unparsable; printf "%5d : header contains non-header lines\n", nonheader; printf "%5d : Date: in the future\n", future; printf "%5d : No Message-ID header\n", nomsg; printf "%5d : missing required header\n", xheader; printf "%5d : unapproved article in moderated group\n", unapproved; printf "%5d : whitespace in Message-ID\n", whitespace; if (unknown > 0) printf "%5d : unknown lines:\n%s", unknown, xx; print "EOF" if (skipud && unparsable > 0) { s="unparsable dates in news articles"; printf ":\n:%s\n:\n", s; msg="'"\ Your machine's news system seems to be generating articles with Date:\\n\ headers in violation of Internet RFC 1036, the Usenet article format\\n\ standard. C News sites will not file or forward such articles.\\n\ The correct date format is\\n\ [Day,] dd Month [yy]yy hh:mm:ss timezone\\n\ Four digit years and numeric timezones are recommended, per RFC1123.\\n\ "'"; complain(s, msg, ud); } if (skipnh && nonheader > 0) { s="non-header lines in news article headers"; printf ":\n:%s\n:\n", s; msg="'"\ Your machine's news system seems to be generating articles with\\n\ non-header lines in headers. A non-header line is one which doesn't\\n\ conform to Internet RFC 1036, the Usenet article format\\n\ standard. C News sites will not file or forward such articles.\\n\ The definition of a header is\\n\ word: text\\n\ where text may carry onto optional continuation lines. Continuation\\n\ lines must start with whitespace. Headers continue till the first\\n\ empty line. A common mistake is to leave empty headers in -- since\\n\ these have no space after the :, they are illegal.\\n\ "'"; complain(s, msg, nh); } if (skipdf && future > 0) { s="bad dates in news articles"; printf ":\n:%s\n:\n", s; msg="'"\ Your machine's news system is generating Date: headers with times that\\n\ appear to be in the future, probably due to a missing or incorrect time\\n\ zone. C News sites will not file or forward such articles.\\n\ "'"; complain(s, msg, df); } if (skipnm && nomsg > 0) { s="news articles without message ids"; printf ":\n:%s\n:\n", s; msg="'"\ Your machine's news system is generating articles without Message-IDs\\n\ in violation of Internet RFC 1036, the Usenet article format\\n\ standard. C News sites will not file or forward such articles.\\n\ "'"; complain(s, msg, nm); } if (skipxh && xheader > 0) { s="missing required headers in news articles"; printf ":\n:%s\n:\n", s; msg="'"\ Your machine's news system is generating articles without required headers,\\n\ in violation of Internet RFC 1036, the Usenet article format standard.\\n\ C News sites will not file or forward such articles.\\n\ "'"; complain(s, msg, xh); } if (skipws && whitespace > 0) { s="whitespace in news article message IDs"; printf ":\n:%s\n:\n", s; msg="'"\ Your machine's news system is generating articles with Message-IDs\\n\ that contain whitespace. This is illegal according to section 2.1.5\\n\ of Internet RFC 1036, the Usenet article format standard.\\n\ C News sites will not file or forward such articles.\\n\ "'"; complain(s, msg, ws); } }' $@