<<< Date Index >>>     <<< Thread Index >>>

[PATCH] hcache reorganization



Hi,

since people on mutt-users@ reported that hcache db files always kept growing, I looked into it. I can confirm this here for qdbm.

My guess is that the db libraries don't do the costly optimization of really removing dead entries but mark them as dead only. With the attached patch, my cache file sizes went down immedtiately.

However, reorganization is a) only supported by qbdm and gdbm as it seems, and b) may take quite some time (up to 1.2 seconds for a 300k folder with a ~74 MB qdbm-compressed hcache db).

I think once mutt provides caching features and asks the user leave them mostly alone, mutt shouldn't let them grow forever. Hence the attached patch only tries to use the reorg facilities only upon syncing the mailbox every 20th time. The counter is stored within the cache file itself.

Right now this is hardcoded, but I think we might want to increase 20 since the disk space gains are measurable but quite low. I don't think this should be user-configurable. A compile-time option might do it, too.

Comments and opinions?

As the inode sorting patch, this one includes timing data in the debug file, too so we can check how fast/slow it is.

Rocco
comparing with ../pdmef/feature/hcache-reorg
searching for changes
diff --git a/hcache.c b/hcache.c
--- a/hcache.c
+++ b/hcache.c
@@ -32,6 +32,8 @@
 #include <db.h>
 #endif
 
+#define REORG_INTERVAL         20
+
 #include <errno.h>
 #include <fcntl.h>
 #if HAVE_SYS_TIME_H
@@ -48,31 +50,23 @@
 #include "lib.h"
 #include "md5.h"
 
-#if HAVE_QDBM
 static struct header_cache
 {
-  VILLA *db;
   char *folder;
   unsigned int crc;
-} HEADER_CACHE;
+#if HAVE_QDBM
+  VILLA *db;
 #elif HAVE_GDBM
-static struct header_cache
-{
   GDBM_FILE db;
-  char *folder;
-  unsigned int crc;
-} HEADER_CACHE;
 #elif HAVE_DB4
-static struct header_cache
-{
   DB_ENV *env;
   DB *db;
-  char *folder;
-  unsigned int crc;
   int fd;
   char lockfile[_POSIX_PATH_MAX];
+#endif
 } HEADER_CACHE;
 
+#if HAVE_DB4
 static void mutt_hcache_dbt_init(DBT * dbt, void *data, size_t len);
 static void mutt_hcache_dbt_empty_init(DBT * dbt);
 #endif
@@ -791,8 +785,7 @@ hcache_open_qdbm (struct header_cache* h
     return -1;
 }
 
-void
-mutt_hcache_close(header_cache_t *h)
+static void hcache_close_qdbm (header_cache_t *h)
 {
   if (!h)
     return;
@@ -820,6 +813,14 @@ mutt_hcache_delete(header_cache_t *h, co
   return vlout(h->db, path, ksize);
 }
 
+
+static int hcache_reorg_qdbm (header_cache_t *h)
+{
+  if (!h)
+    return -1;
+  return vloptimize (h->db);
+}
+
 #elif HAVE_GDBM
 static int
 hcache_open_gdbm (struct header_cache* h, const char* path)
@@ -838,8 +839,7 @@ hcache_open_gdbm (struct header_cache* h
   return -1;
 }
 
-void
-mutt_hcache_close(header_cache_t *h)
+static void hcache_close_gdbm (header_cache_t *h)
 {
   if (!h)
     return;
@@ -867,6 +867,14 @@ mutt_hcache_delete(header_cache_t *h, co
 
   return gdbm_delete(h->db, key);
 }
+
+static int hcache_reorg_gdbm (header_cache_t *h)
+{
+  if (!h)
+    return -1;
+  return gdbm_reorganize (h->db);
+}
+
 #elif HAVE_DB4
 
 static void
@@ -942,8 +950,7 @@ hcache_open_db4 (struct header_cache* h,
   return -1;
 }
 
-void
-mutt_hcache_close(header_cache_t *h)
+static void hcache_close_db4 (header_cache_t *h)
 {
   if (!h)
     return;
@@ -972,6 +979,12 @@ mutt_hcache_delete(header_cache_t *h, co
   mutt_hcache_dbt_init(&key, (void *) filename, keylen(filename));
   return h->db->del(h->db, NULL, &key, 0);
 }
+
+static int hcache_reorg_db4 (header_cache_t *h)
+{
+  return 0;
+}
+
 #endif
 
 header_cache_t *
@@ -1019,6 +1032,65 @@ mutt_hcache_open(const char *path, const
   }
 }
 
+void mutt_hcache_close (header_cache_t *h, int flags)
+{
+  void (*cl) (struct header_cache* h);
+  int (*reorg) (struct header_cache* h);
+
+#if HAVE_QDBM
+  reorg = hcache_reorg_qdbm;
+  cl = hcache_close_qdbm;
+#elif HAVE_GDBM
+  reorg = hcache_reorg_gdbm;
+  cl = hcache_close_gdbm;
+#elif HAVE_DB4
+  reorg = hcache_reorg_db4;
+  cl = hcache_close_db4;
+#endif
+
+  if (flags & M_HC_REORG)
+  {
+    unsigned int tmp = 0;
+    unsigned int alloc = 0;
+    unsigned int *check = mutt_hcache_fetch_raw (h, "/CHECKCOUNT", strlen);
+
+    if (!check)
+      check = &tmp;
+    else
+      alloc = 1;
+
+    dprint (4, (debugfile, "hcache [%s]: done %d syncs so far, check limit is 
%d\n",
+               h->folder, *check, REORG_INTERVAL));
+
+    (*check)++;
+    if (*check >= REORG_INTERVAL)
+    {
+#ifdef DEBUG
+      int rc;
+      struct timeval tv1 = { 0, 0 }, tv2 = { 0, 0 };
+      int a, b;
+
+      gettimeofday (&tv1, NULL);
+      rc = reorg (h);
+      gettimeofday (&tv2, NULL);
+      a = tv2.tv_sec - tv1.tv_sec;
+      b = tv2.tv_usec - tv1.tv_usec;
+      if (b < 0)
+       a--, b *= -1;
+      dprint (4, (debugfile, "hcache [%s]: reorganize: rc = %d, time = %.6f\n",
+                 h->folder, rc, a + (b / 1e6)));
+#else
+      reorg (h);
+#endif
+      *check = 0;
+    }
+    mutt_hcache_store_raw (h, "/CHECKCOUNT", check, sizeof (*check), strlen);
+    if (alloc)
+      FREE(&check);
+  }
+  cl (h);
+}
+
 #if HAVE_DB4
 const char *mutt_hcache_backend (void)
 {
diff --git a/hcache.h b/hcache.h
--- a/hcache.h
+++ b/hcache.h
@@ -28,7 +28,11 @@ typedef int (*hcache_namer_t)(const char
 
 header_cache_t *mutt_hcache_open(const char *path, const char *folder,
   hcache_namer_t namer);
-void mutt_hcache_close(header_cache_t *h);
+
+#define M_HC_REORG     (1<<0)          /* reorg/optimize hcache on close */
+
+void mutt_hcache_close (header_cache_t *h, int flags);
+
 HEADER *mutt_hcache_restore(const unsigned char *d, HEADER **oh);
 void *mutt_hcache_fetch(header_cache_t *h, const char *filename, size_t 
(*keylen)(const char *fn));
 void *mutt_hcache_fetch_raw (header_cache_t *h, const char *filename,
diff --git a/imap/imap.c b/imap/imap.c
--- a/imap/imap.c
+++ b/imap/imap.c
@@ -282,7 +282,7 @@ void imap_expunge_mailbox (IMAP_DATA* id
   }
 
 #if USE_HCACHE
-  imap_hcache_close (idata);
+  imap_hcache_close (idata, M_HC_REORG);
 #endif
 
   /* We may be called on to expunge at any time. We can't rely on the caller
@@ -1186,7 +1186,7 @@ int imap_sync_mailbox (CONTEXT* ctx, int
   }
 
 #if USE_HCACHE
-  imap_hcache_close (idata);
+  imap_hcache_close (idata, M_HC_REORG);
 #endif
 
   /* sync +/- flags for the five flags mutt cares about */
@@ -1602,7 +1602,7 @@ IMAP_STATUS* imap_mboxcache_get (IMAP_DA
   {
     uidvalidity = mutt_hcache_fetch_raw (hc, "/UIDVALIDITY", 
imap_hcache_keylen);
     uidnext = mutt_hcache_fetch_raw (hc, "/UIDNEXT", imap_hcache_keylen);
-    mutt_hcache_close (hc);
+    mutt_hcache_close (hc, 0);
     if (uidvalidity)
     {
       if (!status)
diff --git a/imap/imap_private.h b/imap/imap_private.h
--- a/imap/imap_private.h
+++ b/imap/imap_private.h
@@ -261,7 +261,7 @@ int imap_cache_clean (IMAP_DATA* idata);
 /* util.c */
 #ifdef USE_HCACHE
 header_cache_t* imap_hcache_open (IMAP_DATA* idata, const char* path);
-void imap_hcache_close (IMAP_DATA* idata);
+void imap_hcache_close (IMAP_DATA* idata, int flags);
 HEADER* imap_hcache_get (IMAP_DATA* idata, unsigned int uid);
 int imap_hcache_put (IMAP_DATA* idata, HEADER* h);
 int imap_hcache_del (IMAP_DATA* idata, unsigned int uid);
diff --git a/imap/message.c b/imap/message.c
--- a/imap/message.c
+++ b/imap/message.c
@@ -208,7 +208,7 @@ int imap_read_headers (IMAP_DATA* idata,
       {
         if (h.data)
           imap_free_header_data ((void**) (void*) &h.data);
-        imap_hcache_close (idata);
+        imap_hcache_close (idata, 0);
         fclose (fp);
         return -1;
       }
@@ -303,7 +303,7 @@ int imap_read_headers (IMAP_DATA* idata,
       if (h.data)
         imap_free_header_data ((void**) (void*) &h.data);
 #if USE_HCACHE
-      imap_hcache_close (idata);
+      imap_hcache_close (idata, 0);
 #endif
       fclose (fp);
       return -1;
@@ -335,7 +335,7 @@ int imap_read_headers (IMAP_DATA* idata,
     mutt_hcache_store_raw (idata->hcache, "/UIDNEXT", &idata->uidnext,
                           sizeof (idata->uidnext), imap_hcache_keylen);
 
-  imap_hcache_close (idata);
+  imap_hcache_close (idata, 0);
 #endif /* USE_HCACHE */
 
   fclose(fp);
diff --git a/imap/util.c b/imap/util.c
--- a/imap/util.c
+++ b/imap/util.c
@@ -101,12 +101,12 @@ header_cache_t* imap_hcache_open (IMAP_D
   return mutt_hcache_open (HeaderCache, cachepath, imap_hcache_namer);
 }
 
-void imap_hcache_close (IMAP_DATA* idata)
+void imap_hcache_close (IMAP_DATA* idata, int flags)
 {
   if (!idata->hcache)
     return;
 
-  mutt_hcache_close (idata->hcache);
+  mutt_hcache_close (idata->hcache, flags);
   idata->hcache = NULL;
 }
 
diff --git a/mh.c b/mh.c
--- a/mh.c
+++ b/mh.c
@@ -826,7 +826,7 @@ static int maildir_parse_dir (CONTEXT * 
   closedir (dirp);
 
 #if USE_HCACHE
-  mutt_hcache_close (hc);
+  mutt_hcache_close (hc, 0);
 #endif
 
   return 0;
@@ -1043,7 +1043,7 @@ void maildir_delayed_parsing (CONTEXT * 
 #endif
   }
 #if USE_HCACHE
-  mutt_hcache_close (hc);
+  mutt_hcache_close (hc, 0);
 #endif
 }
 
@@ -1652,7 +1652,7 @@ int mh_sync_mailbox (CONTEXT * ctx, int 
 
 #if USE_HCACHE
   if (ctx->magic == M_MAILDIR || ctx->magic == M_MH)
-    mutt_hcache_close (hc);
+    mutt_hcache_close (hc, M_HC_REORG);
 #endif /* USE_HCACHE */
 
   if (ctx->magic == M_MH)
@@ -1679,7 +1679,7 @@ err:
 err:
 #if USE_HCACHE
   if (ctx->magic == M_MAILDIR || ctx->magic == M_MH)
-    mutt_hcache_close (hc);
+    mutt_hcache_close (hc, M_HC_REORG);
 #endif /* USE_HCACHE */
   return -1;
 }
diff --git a/pop.c b/pop.c
--- a/pop.c
+++ b/pop.c
@@ -322,7 +322,7 @@ static int pop_fetch_headers (CONTEXT *c
   }
 
 #if USE_HCACHE
-    mutt_hcache_close (hc);
+    mutt_hcache_close (hc, 0);
 #endif
 
   if (ret < 0)
@@ -635,7 +635,7 @@ int pop_sync_mailbox (CONTEXT *ctx, int 
     }
 
 #if USE_HCACHE
-    mutt_hcache_close (hc);
+    mutt_hcache_close (hc, M_HC_REORG);
 #endif
 
     if (ret == 0)