[PATCH] hcache reorganization
Hi,
since people on mutt-users@ reported that hcache db files always kept
growing, I looked into it. I can confirm this here for qdbm.
My guess is that the db libraries don't do the costly optimization of
really removing dead entries but mark them as dead only. With the
attached patch, my cache file sizes went down immedtiately.
However, reorganization is a) only supported by qbdm and gdbm as it
seems, and b) may take quite some time (up to 1.2 seconds for a 300k
folder with a ~74 MB qdbm-compressed hcache db).
I think once mutt provides caching features and asks the user leave them
mostly alone, mutt shouldn't let them grow forever. Hence the attached
patch only tries to use the reorg facilities only upon syncing the
mailbox every 20th time. The counter is stored within the cache file
itself.
Right now this is hardcoded, but I think we might want to increase 20
since the disk space gains are measurable but quite low. I don't think
this should be user-configurable. A compile-time option might do it,
too.
Comments and opinions?
As the inode sorting patch, this one includes timing data in the debug
file, too so we can check how fast/slow it is.
Rocco
comparing with ../pdmef/feature/hcache-reorg
searching for changes
diff --git a/hcache.c b/hcache.c
--- a/hcache.c
+++ b/hcache.c
@@ -32,6 +32,8 @@
#include <db.h>
#endif
+#define REORG_INTERVAL 20
+
#include <errno.h>
#include <fcntl.h>
#if HAVE_SYS_TIME_H
@@ -48,31 +50,23 @@
#include "lib.h"
#include "md5.h"
-#if HAVE_QDBM
static struct header_cache
{
- VILLA *db;
char *folder;
unsigned int crc;
-} HEADER_CACHE;
+#if HAVE_QDBM
+ VILLA *db;
#elif HAVE_GDBM
-static struct header_cache
-{
GDBM_FILE db;
- char *folder;
- unsigned int crc;
-} HEADER_CACHE;
#elif HAVE_DB4
-static struct header_cache
-{
DB_ENV *env;
DB *db;
- char *folder;
- unsigned int crc;
int fd;
char lockfile[_POSIX_PATH_MAX];
+#endif
} HEADER_CACHE;
+#if HAVE_DB4
static void mutt_hcache_dbt_init(DBT * dbt, void *data, size_t len);
static void mutt_hcache_dbt_empty_init(DBT * dbt);
#endif
@@ -791,8 +785,7 @@ hcache_open_qdbm (struct header_cache* h
return -1;
}
-void
-mutt_hcache_close(header_cache_t *h)
+static void hcache_close_qdbm (header_cache_t *h)
{
if (!h)
return;
@@ -820,6 +813,14 @@ mutt_hcache_delete(header_cache_t *h, co
return vlout(h->db, path, ksize);
}
+
+static int hcache_reorg_qdbm (header_cache_t *h)
+{
+ if (!h)
+ return -1;
+ return vloptimize (h->db);
+}
+
#elif HAVE_GDBM
static int
hcache_open_gdbm (struct header_cache* h, const char* path)
@@ -838,8 +839,7 @@ hcache_open_gdbm (struct header_cache* h
return -1;
}
-void
-mutt_hcache_close(header_cache_t *h)
+static void hcache_close_gdbm (header_cache_t *h)
{
if (!h)
return;
@@ -867,6 +867,14 @@ mutt_hcache_delete(header_cache_t *h, co
return gdbm_delete(h->db, key);
}
+
+static int hcache_reorg_gdbm (header_cache_t *h)
+{
+ if (!h)
+ return -1;
+ return gdbm_reorganize (h->db);
+}
+
#elif HAVE_DB4
static void
@@ -942,8 +950,7 @@ hcache_open_db4 (struct header_cache* h,
return -1;
}
-void
-mutt_hcache_close(header_cache_t *h)
+static void hcache_close_db4 (header_cache_t *h)
{
if (!h)
return;
@@ -972,6 +979,12 @@ mutt_hcache_delete(header_cache_t *h, co
mutt_hcache_dbt_init(&key, (void *) filename, keylen(filename));
return h->db->del(h->db, NULL, &key, 0);
}
+
+static int hcache_reorg_db4 (header_cache_t *h)
+{
+ return 0;
+}
+
#endif
header_cache_t *
@@ -1019,6 +1032,65 @@ mutt_hcache_open(const char *path, const
}
}
+void mutt_hcache_close (header_cache_t *h, int flags)
+{
+ void (*cl) (struct header_cache* h);
+ int (*reorg) (struct header_cache* h);
+
+#if HAVE_QDBM
+ reorg = hcache_reorg_qdbm;
+ cl = hcache_close_qdbm;
+#elif HAVE_GDBM
+ reorg = hcache_reorg_gdbm;
+ cl = hcache_close_gdbm;
+#elif HAVE_DB4
+ reorg = hcache_reorg_db4;
+ cl = hcache_close_db4;
+#endif
+
+ if (flags & M_HC_REORG)
+ {
+ unsigned int tmp = 0;
+ unsigned int alloc = 0;
+ unsigned int *check = mutt_hcache_fetch_raw (h, "/CHECKCOUNT", strlen);
+
+ if (!check)
+ check = &tmp;
+ else
+ alloc = 1;
+
+ dprint (4, (debugfile, "hcache [%s]: done %d syncs so far, check limit is
%d\n",
+ h->folder, *check, REORG_INTERVAL));
+
+ (*check)++;
+ if (*check >= REORG_INTERVAL)
+ {
+#ifdef DEBUG
+ int rc;
+ struct timeval tv1 = { 0, 0 }, tv2 = { 0, 0 };
+ int a, b;
+
+ gettimeofday (&tv1, NULL);
+ rc = reorg (h);
+ gettimeofday (&tv2, NULL);
+ a = tv2.tv_sec - tv1.tv_sec;
+ b = tv2.tv_usec - tv1.tv_usec;
+ if (b < 0)
+ a--, b *= -1;
+ dprint (4, (debugfile, "hcache [%s]: reorganize: rc = %d, time = %.6f\n",
+ h->folder, rc, a + (b / 1e6)));
+#else
+ reorg (h);
+#endif
+ *check = 0;
+ }
+ mutt_hcache_store_raw (h, "/CHECKCOUNT", check, sizeof (*check), strlen);
+ if (alloc)
+ FREE(&check);
+ }
+ cl (h);
+}
+
#if HAVE_DB4
const char *mutt_hcache_backend (void)
{
diff --git a/hcache.h b/hcache.h
--- a/hcache.h
+++ b/hcache.h
@@ -28,7 +28,11 @@ typedef int (*hcache_namer_t)(const char
header_cache_t *mutt_hcache_open(const char *path, const char *folder,
hcache_namer_t namer);
-void mutt_hcache_close(header_cache_t *h);
+
+#define M_HC_REORG (1<<0) /* reorg/optimize hcache on close */
+
+void mutt_hcache_close (header_cache_t *h, int flags);
+
HEADER *mutt_hcache_restore(const unsigned char *d, HEADER **oh);
void *mutt_hcache_fetch(header_cache_t *h, const char *filename, size_t
(*keylen)(const char *fn));
void *mutt_hcache_fetch_raw (header_cache_t *h, const char *filename,
diff --git a/imap/imap.c b/imap/imap.c
--- a/imap/imap.c
+++ b/imap/imap.c
@@ -282,7 +282,7 @@ void imap_expunge_mailbox (IMAP_DATA* id
}
#if USE_HCACHE
- imap_hcache_close (idata);
+ imap_hcache_close (idata, M_HC_REORG);
#endif
/* We may be called on to expunge at any time. We can't rely on the caller
@@ -1186,7 +1186,7 @@ int imap_sync_mailbox (CONTEXT* ctx, int
}
#if USE_HCACHE
- imap_hcache_close (idata);
+ imap_hcache_close (idata, M_HC_REORG);
#endif
/* sync +/- flags for the five flags mutt cares about */
@@ -1602,7 +1602,7 @@ IMAP_STATUS* imap_mboxcache_get (IMAP_DA
{
uidvalidity = mutt_hcache_fetch_raw (hc, "/UIDVALIDITY",
imap_hcache_keylen);
uidnext = mutt_hcache_fetch_raw (hc, "/UIDNEXT", imap_hcache_keylen);
- mutt_hcache_close (hc);
+ mutt_hcache_close (hc, 0);
if (uidvalidity)
{
if (!status)
diff --git a/imap/imap_private.h b/imap/imap_private.h
--- a/imap/imap_private.h
+++ b/imap/imap_private.h
@@ -261,7 +261,7 @@ int imap_cache_clean (IMAP_DATA* idata);
/* util.c */
#ifdef USE_HCACHE
header_cache_t* imap_hcache_open (IMAP_DATA* idata, const char* path);
-void imap_hcache_close (IMAP_DATA* idata);
+void imap_hcache_close (IMAP_DATA* idata, int flags);
HEADER* imap_hcache_get (IMAP_DATA* idata, unsigned int uid);
int imap_hcache_put (IMAP_DATA* idata, HEADER* h);
int imap_hcache_del (IMAP_DATA* idata, unsigned int uid);
diff --git a/imap/message.c b/imap/message.c
--- a/imap/message.c
+++ b/imap/message.c
@@ -208,7 +208,7 @@ int imap_read_headers (IMAP_DATA* idata,
{
if (h.data)
imap_free_header_data ((void**) (void*) &h.data);
- imap_hcache_close (idata);
+ imap_hcache_close (idata, 0);
fclose (fp);
return -1;
}
@@ -303,7 +303,7 @@ int imap_read_headers (IMAP_DATA* idata,
if (h.data)
imap_free_header_data ((void**) (void*) &h.data);
#if USE_HCACHE
- imap_hcache_close (idata);
+ imap_hcache_close (idata, 0);
#endif
fclose (fp);
return -1;
@@ -335,7 +335,7 @@ int imap_read_headers (IMAP_DATA* idata,
mutt_hcache_store_raw (idata->hcache, "/UIDNEXT", &idata->uidnext,
sizeof (idata->uidnext), imap_hcache_keylen);
- imap_hcache_close (idata);
+ imap_hcache_close (idata, 0);
#endif /* USE_HCACHE */
fclose(fp);
diff --git a/imap/util.c b/imap/util.c
--- a/imap/util.c
+++ b/imap/util.c
@@ -101,12 +101,12 @@ header_cache_t* imap_hcache_open (IMAP_D
return mutt_hcache_open (HeaderCache, cachepath, imap_hcache_namer);
}
-void imap_hcache_close (IMAP_DATA* idata)
+void imap_hcache_close (IMAP_DATA* idata, int flags)
{
if (!idata->hcache)
return;
- mutt_hcache_close (idata->hcache);
+ mutt_hcache_close (idata->hcache, flags);
idata->hcache = NULL;
}
diff --git a/mh.c b/mh.c
--- a/mh.c
+++ b/mh.c
@@ -826,7 +826,7 @@ static int maildir_parse_dir (CONTEXT *
closedir (dirp);
#if USE_HCACHE
- mutt_hcache_close (hc);
+ mutt_hcache_close (hc, 0);
#endif
return 0;
@@ -1043,7 +1043,7 @@ void maildir_delayed_parsing (CONTEXT *
#endif
}
#if USE_HCACHE
- mutt_hcache_close (hc);
+ mutt_hcache_close (hc, 0);
#endif
}
@@ -1652,7 +1652,7 @@ int mh_sync_mailbox (CONTEXT * ctx, int
#if USE_HCACHE
if (ctx->magic == M_MAILDIR || ctx->magic == M_MH)
- mutt_hcache_close (hc);
+ mutt_hcache_close (hc, M_HC_REORG);
#endif /* USE_HCACHE */
if (ctx->magic == M_MH)
@@ -1679,7 +1679,7 @@ err:
err:
#if USE_HCACHE
if (ctx->magic == M_MAILDIR || ctx->magic == M_MH)
- mutt_hcache_close (hc);
+ mutt_hcache_close (hc, M_HC_REORG);
#endif /* USE_HCACHE */
return -1;
}
diff --git a/pop.c b/pop.c
--- a/pop.c
+++ b/pop.c
@@ -322,7 +322,7 @@ static int pop_fetch_headers (CONTEXT *c
}
#if USE_HCACHE
- mutt_hcache_close (hc);
+ mutt_hcache_close (hc, 0);
#endif
if (ret < 0)
@@ -635,7 +635,7 @@ int pop_sync_mailbox (CONTEXT *ctx, int
}
#if USE_HCACHE
- mutt_hcache_close (hc);
+ mutt_hcache_close (hc, M_HC_REORG);
#endif
if (ret == 0)