<<< Date Index >>>     <<< Thread Index >>>

Re: [PATCH] better header cache versioning



On Thursday, 05 April 2007 at 10:40, David Laight wrote:
> On Thu, Apr 05, 2007 at 08:31:11AM +0200, Christian Ebert wrote:
> > * Brendan Cully on Wednesday, April 04, 2007 at 15:23:02 -0700:
> > > Here's a patch that attempts to version the header cache more
> > > accurately. It uses a little script to extract the type definitions
> > > for the structures hcache saves, pipes them through an MD5, and uses
> > > an unsigned long's worth of the result as a CRC.
> > > 
> > > Comments?
> > 
> > Is it save to assume that Python is present on every system?
> 
> No, it almost certainly won't be installed on (say) a NetBSD system.
> (And neither will perl)

Here's an alternative implementation in shell. It does expect to have
either md5 or md5sum available, but that's a much lighter
dependency. It's also a lot slower and stupider, but it seems to
handle the needs of hcache for now.
# HG changeset patch
# User Brendan Cully <brendan@xxxxxxxxxx>
# Date 1175801937 25200
# Node ID 2ebf1ad199dcd667a732a60dbe5d0060c894b457
# Parent  1e2d32e1a64448ed24705d0f2977baed6e09071c
Version header cache against MD5 of structures on which it depends

diff -r 1e2d32e1a644 -r 2ebf1ad199dc Makefile.am
--- a/Makefile.am       Thu Apr 05 09:26:09 2007 -0700
+++ b/Makefile.am       Thu Apr 05 12:38:57 2007 -0700
@@ -13,7 +13,7 @@ SUBDIRS = m4 po intl doc contrib $(IMAP_
 
 bin_SCRIPTS = muttbug flea @SMIMEAUX_TARGET@
 
-BUILT_SOURCES = keymap_defs.h patchlist.c reldate.h
+BUILT_SOURCES = keymap_defs.h patchlist.c reldate.h hcversion.h
 
 bin_PROGRAMS = mutt @DOTLOCK_TARGET@ @PGPAUX_TARGET@
 mutt_SOURCES = $(BUILT_SOURCES) \
@@ -78,7 +78,7 @@ EXTRA_DIST = COPYRIGHT GPL OPS OPS.PGP O
        makedoc.c makedoc-defs.h stamp-doc-rc README.SSL smime.h \
        muttbug pgppacket.h depcomp ascii.h BEWARE PATCHES patchlist.sh \
        ChangeLog ChangeLog.old mkchangelog.sh cvslog2changelog.pl mutt_idna.h \
-       snprintf.c regex.c crypt-gpgme.h
+       snprintf.c regex.c crypt-gpgme.h hcachever.sh
 
 EXTRA_SCRIPTS = smime_keys
 
@@ -125,6 +125,10 @@ reldate.h: $(srcdir)/ChangeLog
        echo 'const char *ReleaseDate = "'`head -n 1 $(srcdir)/ChangeLog | 
LC_ALL=C cut -d ' ' -f 1`'";' > reldate.h.tmp; \
        cmp -s reldate.h.tmp reldate.h || cp reldate.h.tmp reldate.h; \
        rm reldate.h.tmp
+
+hcversion.h: $(srcdir)/mutt.h $(srcdir)/rfc822.h
+       ( echo '#include "config.h"'; echo '#include "mutt.h"'; ) \
+       | $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) 
$(CPPFLAGS) - | $(srcdir)/hcachever.sh hcversion.h
 
 patchlist.c: $(srcdir)/PATCHES $(srcdir)/patchlist.sh
        $(srcdir)/patchlist.sh < $(srcdir)/PATCHES > patchlist.c
diff -r 1e2d32e1a644 -r 2ebf1ad199dc hcache.c
--- a/hcache.c  Thu Apr 05 09:26:09 2007 -0700
+++ b/hcache.c  Thu Apr 05 12:38:57 2007 -0700
@@ -18,8 +18,6 @@
  *     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  
02110-1301, USA.
  */
 
-/* this comment bumps Id after $assumed_charset changed BODY. */
-
 #if HAVE_CONFIG_H
 #include "config.h"
 #endif                         /* HAVE_CONFIG_H */
@@ -41,6 +39,7 @@
 #endif
 #include "mutt.h"
 #include "hcache.h"
+#include "hcversion.h"
 #ifdef USE_IMAP
 #include "message.h"
 #endif
@@ -447,75 +446,8 @@ restore_envelope(ENVELOPE * e, const uns
   restore_list(&e->userhdrs, d, off);
 }
 
-static unsigned int
-crc32(unsigned int crc, unsigned char const *p, size_t len)
-{
-  int i;
-  while (len--)
-  {
-    crc ^= *p++;
-    for (i = 0; i < 8; i++)
-      crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
-  }
-  return crc;
-}
-
 static int
-generate_crc32()
-{
-  int crc = 0;
-  SPAM_LIST *sp = SpamList;
-  RX_LIST *rx = NoSpamList;
-
-  crc = crc32(crc, (unsigned char const *) "$Id$", mutt_strlen("$Id$"));
-
-#if HAVE_LANGINFO_CODESET
-  crc = crc32(crc, (unsigned char const *) Charset, mutt_strlen(Charset));
-  crc = crc32(crc, (unsigned char const *) "HAVE_LANGINFO_CODESET",
-       mutt_strlen("HAVE_LANGINFO_CODESET"));
-#endif
-
-#if EXACT_ADDRESS
-  crc = crc32(crc, (unsigned char const *) "EXACT_ADDRESS",
-       mutt_strlen("EXACT_ADDRESS"));
-#endif
-
-#ifdef USE_POP
-  crc = crc32(crc, (unsigned char const *) "USE_POP", mutt_strlen("USE_POP"));
-#endif
-
-#ifdef MIXMASTER
-  crc = crc32(crc, (unsigned char const *) "MIXMASTER",
-        mutt_strlen("MIXMASTER"));
-#endif
-
-#ifdef USE_IMAP
-  crc = crc32(crc, (unsigned char const *) "USE_IMAP", 
mutt_strlen("USE_IMAP"));
-  crc = crc32(crc, (unsigned char const *) ImapHeaders,
-        mutt_strlen(ImapHeaders));
-#endif
-  while (sp)
-  {
-    crc = crc32(crc, (unsigned char const *) sp->rx->pattern,
-         mutt_strlen(sp->rx->pattern));
-    sp = sp->next;
-  }
-
-  crc = crc32(crc, (unsigned char const *) "SPAM_SEPERATOR",
-       mutt_strlen("SPAM_SEPERATOR"));
-
-  while (rx)
-  {
-    crc = crc32(crc, (unsigned char const *) rx->rx->pattern,
-         mutt_strlen(rx->rx->pattern));
-    rx = rx->next;
-  }
-
-  return crc;
-}
-
-static int
-crc32_matches(const char *d, unsigned int crc)
+crc_matches(const char *d, unsigned int crc)
 {
   int off = sizeof (validate);
   unsigned int mycrc = 0;
@@ -639,7 +571,7 @@ mutt_hcache_fetch(header_cache_t *h, con
 
   data = mutt_hcache_fetch_raw (h, filename, keylen);
 
-  if (!data || !crc32_matches(data, h->crc))
+  if (!data || !crc_matches(data, h->crc))
   {
     FREE(&data);
     return NULL;
@@ -795,7 +727,7 @@ mutt_hcache_open(const char *path, const
 
   h->db = NULL;
   h->folder = get_foldername(folder);
-  h->crc = generate_crc32();
+  h->crc = HCACHEVER;
 
   if (!path || path[0] == '\0')
   {
@@ -860,7 +792,7 @@ mutt_hcache_open(const char *path, const
 
   h->db = NULL;
   h->folder = get_foldername(folder);
-  h->crc = generate_crc32();
+  h->crc = HCACHEVER;
 
   if (!path || path[0] == '\0')
   {
@@ -946,7 +878,7 @@ mutt_hcache_open(const char *path, const
   int pagesize = atoi(HeaderCachePageSize);
   char* tmp;
 
-  h->crc = generate_crc32();
+  h->crc = HCACHEVER;
 
   if (!path || path[0] == '\0')
   {
diff -r 1e2d32e1a644 -r 2ebf1ad199dc hcachever.sh
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/hcachever.sh      Thu Apr 05 12:38:57 2007 -0700
@@ -0,0 +1,104 @@
+#!/bin/sh
+
+BASEVERSION=1
+
+if which md5 > /dev/null
+then
+  MD5=md5
+elif which md5sum > /dev/null
+then
+  MD5=md5sum
+else
+  echo "ERROR: no MD5 tool found"
+  exit 1
+fi
+
+cleanstruct () {
+  STRUCT="$1"
+  STRUCT=${STRUCT#\} }
+  STRUCT=${STRUCT%\;}
+
+  echo $STRUCT
+}
+
+cleanbody () {
+  echo "$1" | sed -e 's/{ *//'
+}
+
+getstruct () {
+  STRUCT=""
+  BODY=''
+  inbody=0
+  case "$1" in
+    *'{') inbody=1 ;;
+    *';') return ;;
+  esac
+
+  while read line
+  do
+    if test $inbody -eq 0
+    then
+      case "$line" in
+        '{'*) inbody=1 ;;
+        *';') return ;;
+      esac
+    fi
+
+    case "$line" in
+      '} '*)
+        STRUCT=`cleanstruct "$line"`
+        break
+      ;;
+      '}')
+        read line
+        STRUCT=`cleanstruct "$line"`
+        break
+      ;;
+      '#'*) continue ;;
+      *)
+        if test $inbody -ne 0
+        then
+          BODY="$BODY $line"
+        fi
+      ;;
+    esac
+  done
+
+  case $STRUCT in
+    ADDRESS|LIST|BUFFER|PARAMETER|BODY|ENVELOPE)
+      BODY=`cleanbody "$BODY"`
+      echo "$STRUCT: $BODY"
+    ;;
+  esac
+  return
+}
+
+DEST="$1"
+TMPD="$DEST.tmp"
+
+TEXT="$BASEVERSION"
+
+echo "/* base version: $BASEVERSION" > $TMPD
+while read line
+do
+  case "$line" in
+    'typedef struct'*)
+       STRUCT=`getstruct "$line"`
+       if test -n "$STRUCT"
+       then
+         NAME=${STRUCT%%:*}
+         BODY=${STRUCT#*:}
+         echo " * $NAME:" $BODY >> $TMPD
+         TEXT="$TEXT $NAME {$BODY}"
+       fi
+    ;;
+  esac
+done
+echo " */" >> $TMPD
+
+MD5TEXT=`echo $TEXT | $MD5`
+echo "#define HCACHEVER 0x"${MD5TEXT:0:8} >> $TMPD
+
+# TODO: validate we have all structs
+
+mv $TMPD $DEST

Attachment: pgp2JNcACPVh4.pgp
Description: PGP signature