#!/bin/bash # sa-harvest # Copyright 2006, Faisal N. Jawdat, faisal@faisal.com # Version 0.71 # License # Permission is granted to use and modify under the terms of the Apache # License, Version 2.0, # as of January 2004. Please read the license # here: # http://www.apache.org/licenses/LICENSE-2.0 # # Special thanks to The Obscure Organization, http://www.obscure.org/ # for supporting this project. Additional thanks to Chris Heiser, who # helped with the autogenerated whitelist code. # you really want this line. or your sysadmin does. unless you're the # only user. you might have to tweak it for your os. alias sa-learn='/bin/nice -19 /usr/bin/sa-learn' cd echo 'Gathering configuration Information.' echo '' echo 'Ham:' export HAMDIRS='' for ham in `cat ~/.spamassassin/mail.ham`; do export HAMDIRS="$HAMDIRS $ham"; done export HAMDIRS=`echo $HAMDIRS|sed -s 's/^ //'` echo "$HAMDIRS" echo '' echo 'Sent:' export SENTDIRS='' for sent in `cat ~/.spamassassin/mail.sent`; do export SENTDIRS="$SENTDIRS $sent"; done export SENTDIRS=`echo $SENTDIRS|sed -s 's/^ //'` echo "$SENTDIRS" echo '' echo 'Spam:' export SPAMDIRS='' for spam in `cat ~/.spamassassin/mail.spam`; do export SPAMDIRS="$SPAMDIRS $spam"; done export SPAMDIRS=`echo $SPAMDIRS|sed -s 's/^ //'` echo "$SPAMDIRS" echo '' echo '' # Sync once to clear any remaining issues sa-learn --sync echo '' # Learn echo 'Learning...' echo '' echo 'Ham:' sa-learn --no-sync --ham $HAMDIRS echo '' echo 'Spam: ' sa-learn --no-sync --spam $SPAMDIRS echo '' sa-learn --sync echo '' sa-learn --dump magic echo '' # Strip mailbox config hack so we don't try to grep flags. export HAMDIRS="`echo $HAMDIRS | sed -e 's/ --mbox//' | sed -e 's/ --mbx//'`" export SENTDIRS="`echo $SENTDIRS | sed -e 's/ --mbox//' | sed -e 's/ --mbx//'`" # Get whitelist echo -n 'Recalculating whitelist...' # start building user_prefs cp ~/.spamassassin/user_prefs.base ~/.spamassassin/user_prefs.tmp # build transient whitelists for current ham and sent inboxes grep -a -h -r -E '^From:.*@.*' $HAMDIRS | sed -n "s/^.*<\(.*\)>.*/\1/p" > ~/.spamassassin/tmp.whitelist.ham grep -a -h -r -E '^To:.*@.*' $SENTDIRS | sed -n "s/^.*<\(.*\)>.*/\1/p" > ~/.spamassassin/tmp.whitelist.sent # add the manual and cached addressbooks and the transient whitelist to create the candidate new addressbook cat ~/.spamassassin/addressbook ~/.spamassassin/addressbook.cache ~/.spamassassin/tmp.whitelist.ham ~/.spamassassin/tmp.whitelist.sent | tr A-Z a-z | sort | uniq > ~/.spamassassin/tmp.candidate # build whitelist.negative from current spam inboxes and existing addressbook.negative grep -a -h -r -E '^From:.*@.*' $SPAMDIRS | sed -n "s/^.*<\(.*\)>.*/\1/p" >> ~/.spamassassin/tmp.negative.spam cat ~/.spamassassin/addressbook.negative ~/.spamassassin/tmp.negative.spam | tr A-Z a-z | sort | uniq > ~/.spamassassin/tmp.negative # subtract tmp.whitelist.negative from tmp.whitelist to create new cached whitelist cat ~/.spamassassin/tmp.candidate | grep -v -f ~/.spamassassin/tmp.negative | tr A-Z a-z | sort | uniq >> ~/.spamassassin/addressbook.cache # add the cached addressbook to the whitelist in user_prefs cat ~/.spamassassin/addressbook.cache | sed -n "s/\(.*\)/whitelist_from \1/p" >> ~/.spamassassin/user_prefs.tmp # new user prefs is built: activate it mv ~/.spamassassin/user_prefs.tmp ~/.spamassassin/user_prefs # clean up rm ~/.spamassassin/tmp.* echo ' done.' echo '' echo 'Ham:' echo grep -h -r ^X-Spam-Level $HAMDIRS | sort | uniq -c echo '' echo 'Spam:' echo grep -h -r ^X-Spam-Level $SPAMDIRS | sort | uniq -c echo '' uptime