<<< Date Index >>>     <<< Thread Index >>>

Re: [PATCH] generic spam detection



On Mon, Jul 12, 2004 at 01:00:51PM -0500, David Champion wrote:

> There are several header-caching patches available now. The problem
> occurs because the caching patches circumvent the usual ways of loading
> message HEADERs and ENVELOPEs, so each patch would require its own
> update. Since Thomas has said that the hormel patch looks right for CVS,
> I'm going to take the easy way out and leave it to those patch authors
> to update for future releases -- sorry, but this seems like the least
> trouble/effort expended for all, in that it provides one stable base for
> others to work against.

Okay, I wrote a bogus patch against hcache19. (attached.)
(Of cource, sizeof(int) should be too short for spam/nospam IDs.
Moreover, I'm not sure whether it works as expected. :)


> > BTW, I suggest you set a default value of $spam_separator.
> > Maybe ", " or "/".
> > With a default value, all we have to do is execute "spam" command
> > as many times as we need. Very simple usage. Good for beginners.
> 
> I saw your followup saying to ignore this. I just want to remark that
> this is reasonable, but a style decision. I favored simplicity over
> complexity to make things easier for new users, but it certainly could
> be done differently, and there's a beginning-user argument for showing
> them all by default, too. I don't have much opinion on this.

I don't have much opinion now, too.
Well, the reason why I thought null-$spam_separator was just to
concatenate templates was that I hoped so; I wanted to set:
        spam foo f
        spam bar b
to show "fbbbfb" in index_format(%H), for example.
This is useful for narrow screen. Just FYI.


[unspam/unnospam]
> But I can see a need for this if spam/nospam are used for more than just
> spam. Does anyone else have thoughts on this?

As Dave said, some of us would need unspam/unnospam for folder-hook.

Thanks,
-- 
tamo
*** mutt-cvs-maildir-header-cache.19    Fri Jul  9 22:52:56 2004
--- hcache-with-spam.patch      Wed Jul 14 13:51:08 2004
***************
*** 112,118 ****
  diff -Nru a/hcache.c b/hcache.c
  --- /dev/null Wed Dec 31 16:00:00 196900
  +++ b/hcache.c        2004-07-09 15:52:56 +02:00
! @@ -0,0 +1,676 @@
  +/*
  + * Copyright (C) 2004 Thomas Glanzmann <sithglan@xxxxxxxxxxxxxxxxxxxx>
  + * Copyright (C) 2004 Brian Fundakowski Feldman <green@xxxxxxxxxxx>
--- 112,118 ----
  diff -Nru a/hcache.c b/hcache.c
  --- /dev/null Wed Dec 31 16:00:00 196900
  +++ b/hcache.c        2004-07-09 15:52:56 +02:00
! @@ -0,0 +1,751 @@
  +/*
  + * Copyright (C) 2004 Thomas Glanzmann <sithglan@xxxxxxxxxxxxxxxxxxxx>
  + * Copyright (C) 2004 Brian Fundakowski Feldman <green@xxxxxxxxxxx>
***************
*** 373,378 ****
--- 373,380 ----
  +static unsigned char *
  +dump_envelope(ENVELOPE *e, unsigned char *d, int *off)
  +{
+ +     char *spamdata;
+ +
  +     d = dump_address(e->return_path, d, off);
  +     d = dump_address(e->from, d, off);
  +     d = dump_address(e->to, d, off);
***************
*** 393,398 ****
--- 395,411 ----
  +     d = dump_char(e->date, d, off);
  +     d = dump_char(e->x_label, d, off);
  +
+ +     if(e->spam)
+ +     {
+ +       spamdata = safe_malloc(e->spam->dsize + 1);
+ +       memset(spamdata, 0, e->spam->dsize + 1);
+ +       strncpy(spamdata, e->spam->data, e->spam->dsize);
+ +       d = dump_char(spamdata, d, off);
+ +       FREE(&spamdata);
+ +     }
+ +     else
+ +       d = dump_char(NULL, d, off);
+ +
  +     d = dump_list(e->references, d, off);
  +     d = dump_list(e->in_reply_to, d, off);
  +     d = dump_list(e->userhdrs, d, off);
***************
*** 404,409 ****
--- 417,423 ----
  +restore_envelope(ENVELOPE *e, const unsigned char *d, int *off)
  +{
  +     int real_subj_off;
+ +     char *spamdata;
  +
  +     restore_address(& e->return_path, d, off);
  +     restore_address(& e->from, d, off);
***************
*** 426,437 ****
--- 440,506 ----
  +     restore_char(& e->date, d, off);
  +     restore_char(& e->x_label, d, off);
  +
+ +     restore_char(&spamdata, d, off);
+ +     mutt_buffer_from(e->spam, spamdata);
+ +
  +     restore_list(& e->references, d, off);
  +     restore_list(& e->in_reply_to, d, off);
  +     restore_list(& e->userhdrs, d, off);
  +}
  +
  +
+ +unsigned int
+ +mutt_hcache_spamlist_hash(void)
+ +{
+ +     unsigned int rv = 0;
+ +     SPAM_LIST *currentlist = SpamList;
+ +
+ +     while(currentlist)
+ +     {
+ +       if(currentlist->rx && currentlist->rx->pattern)
+ +         rv ^= hash_string((unsigned char *)currentlist->rx->pattern,
+ +                           strlen(currentlist->rx->pattern));
+ +       currentlist = currentlist->next;
+ +     }
+ +     return(rv);
+ +}
+ +
+ +unsigned int
+ +mutt_hcache_nospamlist_hash(void)
+ +{
+ +     unsigned int rv = 0;
+ +     RX_LIST *currentlist = NoSpamList;
+ +
+ +     while(currentlist)
+ +     {
+ +       if(currentlist->rx && currentlist->rx->pattern)
+ +         rv ^= hash_string((unsigned char *)currentlist->rx->pattern,
+ +                           strlen(currentlist->rx->pattern));
+ +       currentlist = currentlist->next;
+ +     }
+ +     return(rv);
+ +}
+ +
+ +int
+ +mutt_hcache_spam_matches(const char *d)
+ +{
+ +     int off = sizeof(struct timeval);
+ +     unsigned int spamlist_hash;
+ +
+ +     restore_int(&spamlist_hash, (unsigned char *) d, &off);
+ +     return(spamlist_hash == mutt_hcache_spamlist_hash());
+ +}
+ +
+ +int
+ +mutt_hcache_nospam_matches(const char *d)
+ +{
+ +     int off = sizeof(struct timeval) + sizeof(unsigned int);
+ +     unsigned int nospamlist_hash;
+ +
+ +     restore_int(&nospamlist_hash, (unsigned char *) d, &off);
+ +     return(nospamlist_hash == mutt_hcache_nospamlist_hash());
+ +}
+ +
  +/* This function transforms a header into a char so that it is useable by
  + * gdbm_store */
  +
***************
*** 440,446 ****
  +mutt_hcache_charset_matches(const char *d)
  +{
  +     int matches;
! +     int off = sizeof(struct timeval);
  +     char *charset = NULL;
  +
  +     restore_char(&charset, (unsigned char *) d, &off);
--- 509,515 ----
  +mutt_hcache_charset_matches(const char *d)
  +{
  +     int matches;
! +     int off = sizeof(struct timeval) + sizeof(unsigned int) + 
sizeof(unsigned int);
  +     char *charset = NULL;
  +
  +     restore_char(&charset, (unsigned char *) d, &off);
***************
*** 463,468 ****
--- 532,540 ----
  +     memcpy(d, &now, sizeof(struct timeval));
  +     *off += sizeof(struct timeval);
  +
+ +     d = dump_int(mutt_hcache_spamlist_hash(), d, off);
+ +     d = dump_int(mutt_hcache_nospamlist_hash(), d, off);
+ +
  +#if HAVE_LANGINFO_CODESET
  +     d = dump_char(Charset, d, off);
  +#endif /* HAVE_LANGINFO_CODESET */
***************
*** 488,493 ****
--- 560,568 ----
  +     /* skip timeval */
  +     off += sizeof(struct timeval);
  +
+ +     /* skip spamlist and nospamlist */
+ +     off += sizeof(unsigned int) + sizeof(unsigned int);
+ +
  +#if HAVE_LANGINFO_CODESET
  +     skip_char(d, &off);
  +#endif /* HAVE_LANGINFO_CODESET */
***************
*** 840,846 ****
  diff -Nru a/mh.c b/mh.c
  --- a/mh.c    2004-07-09 15:52:56 +02:00
  +++ b/mh.c    2004-07-09 15:52:56 +02:00
! @@ -779,11 +779,65 @@
     return r;
   }
   
--- 915,921 ----
  diff -Nru a/mh.c b/mh.c
  --- a/mh.c    2004-07-09 15:52:56 +02:00
  +++ b/mh.c    2004-07-09 15:52:56 +02:00
! @@ -779,11 +779,67 @@
     return r;
   }
   
***************
*** 883,888 ****
--- 958,965 ----
  +             if (data != NULL
  +              && ret == 0
  +              && lastchanged.st_mtime <= when->tv_sec
+ +              && mutt_hcache_spam_matches (data)
+ +              && mutt_hcache_nospam_matches (data)
  +#if HAVE_LANGINFO_CODESET
  +              && mutt_hcache_charset_matches (data)
  +#endif /* HAVE_LANGINFO_CODESET */
***************
*** 1005,1011 ****
  diff -Nru a/protos.h b/protos.h
  --- a/protos.h        2004-07-09 15:52:56 +02:00
  +++ b/protos.h        2004-07-09 15:52:56 +02:00
! @@ -99,6 +99,19 @@
   ENVELOPE *mutt_read_rfc822_header (FILE *, HEADER *, short, short);
   HEADER *mutt_dup_header (HEADER *);
   
--- 1082,1088 ----
  diff -Nru a/protos.h b/protos.h
  --- a/protos.h        2004-07-09 15:52:56 +02:00
  +++ b/protos.h        2004-07-09 15:52:56 +02:00
! @@ -99,6 +99,21 @@
   ENVELOPE *mutt_read_rfc822_header (FILE *, HEADER *, short, short);
   HEADER *mutt_dup_header (HEADER *);
   
***************
*** 1016,1021 ****
--- 1093,1100 ----
  +void *mutt_hcache_fetch(void *db, const char *filename);
  +int mutt_hcache_store(void *db, const char *filename, HEADER *h);
  +int mutt_hcache_delete(void *db, const char *filename);
+ +int mutt_hcache_spam_matches(const char *d);
+ +int mutt_hcache_nospam_matches(const char *d);
  +#if HAVE_LANGINFO_CODESET
  +int mutt_hcache_charset_matches(const char *d);
  +#endif /* HAVE_LANGINFO_CODESET */