#!/usr/bin/perl -w # Copyright (C) 2001 Simon Huggins # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., 59 # Temple Place, Suite 330, Boston, MA 02111-1307 USA use strict; my (@tags,%words,%blacklist); if (defined $cfg{'grep_debug'} and $cfg{'grep_debug'}) { open(DEBUG, ">$cfg{'grep_debugfile'}"); } open(HANDLE, "<$cfg{'tagfile'}") or htagdie "Could not open $cfg{'tagfile'}: $!\n"; @tags=; close(HANDLE); while() { chomp; s/\s+//g; $blacklist{$_}++; } open(MSG, "<$cfg{'msgfile'}"); while() { s/[\s\t\n]+/ /g; tr/A-Za-z0-9 //dc; # delete non-alphanumeric s/\b\d+\b//g; $_ = lc $_; my @words = split; foreach (@words) { next if length($_)>9; $words{$_}++ if not exists $blacklist{$_}; } } close(MSG); my @goodtags; my $count=0; foreach my $key (sort { $words{$b} <=> $words{$a} } keys %words) { print DEBUG "$key occurred $words{$key} times\n" if $cfg{'grep_debug'}; my @foundtags = grep { /\b$key\b/i } @tags; push @goodtags,@foundtags; # Tags with more than one matching word will get # pushed on more than one time print DEBUG join "\n",@foundtags if $cfg{'grep_debug'}; $count++; last if $count >20; } open(OUT, ">$cfg{'tmptagfile'}") or htagdie "$0: Could not open $cfg{'tmptagfile'}: $!\n"; reg_deletion("$cfg{'tmptagfile'}"); if (@goodtags) { print OUT $goodtags[rand(@goodtags)]; } else { exit(5); } END { close(OUT); if ($cfg{'grep_debug'}) { close(DEBUG); } } __DATA__ a about again all am an and another any apr are arent as at aug be because been before being but by can cant cat could dec did do doesnt dont down ehlo esmtp even every feb for fri from gmt go great had hadnt has have he her here hers herself him himself his how however i id if im in instead into is it its itself ive jan jul jun know like lots mar may maybe me might might mine mon more must my near need new no not nov now oct of off oh on or ought ours out over please quite received said same sat seem seemed seems sep she should should smtp so some such sun than that thats the their theirs them then there theres these they this thu to tom too tue up us very want was we wed well went were what when which while who why will with wont would would yes yet you your youre yours youve