Re: [PATCH] generic spam detection
On Mon, Jul 12, 2004 at 01:00:51PM -0500, David Champion wrote:
> There are several header-caching patches available now. The problem
> occurs because the caching patches circumvent the usual ways of loading
> message HEADERs and ENVELOPEs, so each patch would require its own
> update. Since Thomas has said that the hormel patch looks right for CVS,
> I'm going to take the easy way out and leave it to those patch authors
> to update for future releases -- sorry, but this seems like the least
> trouble/effort expended for all, in that it provides one stable base for
> others to work against.
Okay, I wrote a bogus patch against hcache19. (attached.)
(Of cource, sizeof(int) should be too short for spam/nospam IDs.
Moreover, I'm not sure whether it works as expected. :)
> > BTW, I suggest you set a default value of $spam_separator.
> > Maybe ", " or "/".
> > With a default value, all we have to do is execute "spam" command
> > as many times as we need. Very simple usage. Good for beginners.
>
> I saw your followup saying to ignore this. I just want to remark that
> this is reasonable, but a style decision. I favored simplicity over
> complexity to make things easier for new users, but it certainly could
> be done differently, and there's a beginning-user argument for showing
> them all by default, too. I don't have much opinion on this.
I don't have much opinion now, too.
Well, the reason why I thought null-$spam_separator was just to
concatenate templates was that I hoped so; I wanted to set:
spam foo f
spam bar b
to show "fbbbfb" in index_format(%H), for example.
This is useful for narrow screen. Just FYI.
[unspam/unnospam]
> But I can see a need for this if spam/nospam are used for more than just
> spam. Does anyone else have thoughts on this?
As Dave said, some of us would need unspam/unnospam for folder-hook.
Thanks,
--
tamo
*** mutt-cvs-maildir-header-cache.19 Fri Jul 9 22:52:56 2004
--- hcache-with-spam.patch Wed Jul 14 13:51:08 2004
***************
*** 112,118 ****
diff -Nru a/hcache.c b/hcache.c
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/hcache.c 2004-07-09 15:52:56 +02:00
! @@ -0,0 +1,676 @@
+/*
+ * Copyright (C) 2004 Thomas Glanzmann <sithglan@xxxxxxxxxxxxxxxxxxxx>
+ * Copyright (C) 2004 Brian Fundakowski Feldman <green@xxxxxxxxxxx>
--- 112,118 ----
diff -Nru a/hcache.c b/hcache.c
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/hcache.c 2004-07-09 15:52:56 +02:00
! @@ -0,0 +1,751 @@
+/*
+ * Copyright (C) 2004 Thomas Glanzmann <sithglan@xxxxxxxxxxxxxxxxxxxx>
+ * Copyright (C) 2004 Brian Fundakowski Feldman <green@xxxxxxxxxxx>
***************
*** 373,378 ****
--- 373,380 ----
+static unsigned char *
+dump_envelope(ENVELOPE *e, unsigned char *d, int *off)
+{
+ + char *spamdata;
+ +
+ d = dump_address(e->return_path, d, off);
+ d = dump_address(e->from, d, off);
+ d = dump_address(e->to, d, off);
***************
*** 393,398 ****
--- 395,411 ----
+ d = dump_char(e->date, d, off);
+ d = dump_char(e->x_label, d, off);
+
+ + if(e->spam)
+ + {
+ + spamdata = safe_malloc(e->spam->dsize + 1);
+ + memset(spamdata, 0, e->spam->dsize + 1);
+ + strncpy(spamdata, e->spam->data, e->spam->dsize);
+ + d = dump_char(spamdata, d, off);
+ + FREE(&spamdata);
+ + }
+ + else
+ + d = dump_char(NULL, d, off);
+ +
+ d = dump_list(e->references, d, off);
+ d = dump_list(e->in_reply_to, d, off);
+ d = dump_list(e->userhdrs, d, off);
***************
*** 404,409 ****
--- 417,423 ----
+restore_envelope(ENVELOPE *e, const unsigned char *d, int *off)
+{
+ int real_subj_off;
+ + char *spamdata;
+
+ restore_address(& e->return_path, d, off);
+ restore_address(& e->from, d, off);
***************
*** 426,437 ****
--- 440,506 ----
+ restore_char(& e->date, d, off);
+ restore_char(& e->x_label, d, off);
+
+ + restore_char(&spamdata, d, off);
+ + mutt_buffer_from(e->spam, spamdata);
+ +
+ restore_list(& e->references, d, off);
+ restore_list(& e->in_reply_to, d, off);
+ restore_list(& e->userhdrs, d, off);
+}
+
+
+ +unsigned int
+ +mutt_hcache_spamlist_hash(void)
+ +{
+ + unsigned int rv = 0;
+ + SPAM_LIST *currentlist = SpamList;
+ +
+ + while(currentlist)
+ + {
+ + if(currentlist->rx && currentlist->rx->pattern)
+ + rv ^= hash_string((unsigned char *)currentlist->rx->pattern,
+ + strlen(currentlist->rx->pattern));
+ + currentlist = currentlist->next;
+ + }
+ + return(rv);
+ +}
+ +
+ +unsigned int
+ +mutt_hcache_nospamlist_hash(void)
+ +{
+ + unsigned int rv = 0;
+ + RX_LIST *currentlist = NoSpamList;
+ +
+ + while(currentlist)
+ + {
+ + if(currentlist->rx && currentlist->rx->pattern)
+ + rv ^= hash_string((unsigned char *)currentlist->rx->pattern,
+ + strlen(currentlist->rx->pattern));
+ + currentlist = currentlist->next;
+ + }
+ + return(rv);
+ +}
+ +
+ +int
+ +mutt_hcache_spam_matches(const char *d)
+ +{
+ + int off = sizeof(struct timeval);
+ + unsigned int spamlist_hash;
+ +
+ + restore_int(&spamlist_hash, (unsigned char *) d, &off);
+ + return(spamlist_hash == mutt_hcache_spamlist_hash());
+ +}
+ +
+ +int
+ +mutt_hcache_nospam_matches(const char *d)
+ +{
+ + int off = sizeof(struct timeval) + sizeof(unsigned int);
+ + unsigned int nospamlist_hash;
+ +
+ + restore_int(&nospamlist_hash, (unsigned char *) d, &off);
+ + return(nospamlist_hash == mutt_hcache_nospamlist_hash());
+ +}
+ +
+/* This function transforms a header into a char so that it is useable by
+ * gdbm_store */
+
***************
*** 440,446 ****
+mutt_hcache_charset_matches(const char *d)
+{
+ int matches;
! + int off = sizeof(struct timeval);
+ char *charset = NULL;
+
+ restore_char(&charset, (unsigned char *) d, &off);
--- 509,515 ----
+mutt_hcache_charset_matches(const char *d)
+{
+ int matches;
! + int off = sizeof(struct timeval) + sizeof(unsigned int) +
sizeof(unsigned int);
+ char *charset = NULL;
+
+ restore_char(&charset, (unsigned char *) d, &off);
***************
*** 463,468 ****
--- 532,540 ----
+ memcpy(d, &now, sizeof(struct timeval));
+ *off += sizeof(struct timeval);
+
+ + d = dump_int(mutt_hcache_spamlist_hash(), d, off);
+ + d = dump_int(mutt_hcache_nospamlist_hash(), d, off);
+ +
+#if HAVE_LANGINFO_CODESET
+ d = dump_char(Charset, d, off);
+#endif /* HAVE_LANGINFO_CODESET */
***************
*** 488,493 ****
--- 560,568 ----
+ /* skip timeval */
+ off += sizeof(struct timeval);
+
+ + /* skip spamlist and nospamlist */
+ + off += sizeof(unsigned int) + sizeof(unsigned int);
+ +
+#if HAVE_LANGINFO_CODESET
+ skip_char(d, &off);
+#endif /* HAVE_LANGINFO_CODESET */
***************
*** 840,846 ****
diff -Nru a/mh.c b/mh.c
--- a/mh.c 2004-07-09 15:52:56 +02:00
+++ b/mh.c 2004-07-09 15:52:56 +02:00
! @@ -779,11 +779,65 @@
return r;
}
--- 915,921 ----
diff -Nru a/mh.c b/mh.c
--- a/mh.c 2004-07-09 15:52:56 +02:00
+++ b/mh.c 2004-07-09 15:52:56 +02:00
! @@ -779,11 +779,67 @@
return r;
}
***************
*** 883,888 ****
--- 958,965 ----
+ if (data != NULL
+ && ret == 0
+ && lastchanged.st_mtime <= when->tv_sec
+ + && mutt_hcache_spam_matches (data)
+ + && mutt_hcache_nospam_matches (data)
+#if HAVE_LANGINFO_CODESET
+ && mutt_hcache_charset_matches (data)
+#endif /* HAVE_LANGINFO_CODESET */
***************
*** 1005,1011 ****
diff -Nru a/protos.h b/protos.h
--- a/protos.h 2004-07-09 15:52:56 +02:00
+++ b/protos.h 2004-07-09 15:52:56 +02:00
! @@ -99,6 +99,19 @@
ENVELOPE *mutt_read_rfc822_header (FILE *, HEADER *, short, short);
HEADER *mutt_dup_header (HEADER *);
--- 1082,1088 ----
diff -Nru a/protos.h b/protos.h
--- a/protos.h 2004-07-09 15:52:56 +02:00
+++ b/protos.h 2004-07-09 15:52:56 +02:00
! @@ -99,6 +99,21 @@
ENVELOPE *mutt_read_rfc822_header (FILE *, HEADER *, short, short);
HEADER *mutt_dup_header (HEADER *);
***************
*** 1016,1021 ****
--- 1093,1100 ----
+void *mutt_hcache_fetch(void *db, const char *filename);
+int mutt_hcache_store(void *db, const char *filename, HEADER *h);
+int mutt_hcache_delete(void *db, const char *filename);
+ +int mutt_hcache_spam_matches(const char *d);
+ +int mutt_hcache_nospam_matches(const char *d);
+#if HAVE_LANGINFO_CODESET
+int mutt_hcache_charset_matches(const char *d);
+#endif /* HAVE_LANGINFO_CODESET */