<<< Date Index >>>     <<< Thread Index >>>

Maildir parsing optimization for ext3/Linux 2.6



Linux 2.6 adds hashed directory support to ext3.  As a result, readdir()
returns directory entries in a pretty wild order.  Opening files in
this order results in enormous seek overhead.  This overhead can be
reduced if the files are sorted by inode number first.

Without the patch below, mutt needs 200 seconds to open a maildir folder
with 18,500 messages which does not reside in the dentry/page cache.  If
the patch is applied, less than 15 seconds are required.  If the folder
is in cache, 3.1 vs. 3.3 seconds are needed.

If necessary, I can add some autoconf hackery to activate this
optimization only on Linux systems (where the d_ino field is always
available).

Index: mh.c
===================================================================
RCS file: /home/roessler/cvs/mutt/mh.c,v
retrieving revision 3.17
diff -u -r3.17 mh.c
--- mh.c        19 Sep 2003 13:03:25 -0000      3.17
+++ mh.c        6 Oct 2003 07:51:40 -0000
@@ -30,6 +30,7 @@
 #include "sort.h"
 
 #include <sys/stat.h>
+#include <sys/types.h>
 #include <dirent.h>
 #include <limits.h>
 #include <unistd.h>
@@ -47,6 +48,7 @@
   HEADER *h;
   char *canon_fname;
   unsigned header_parsed:1;
+  ino_t inode;
   struct maildir *next;
 };
 
@@ -626,7 +628,7 @@
 
 static int maildir_parse_entry (CONTEXT * ctx, struct maildir ***last,
                                const char *subdir, const char *fname,
-                               int *count, int is_old)
+                               int *count, int is_old, ino_t inode)
 {
   struct maildir *entry;
   HEADER *h = NULL;
@@ -666,6 +668,7 @@
     entry = safe_calloc (sizeof (struct maildir), 1);
     entry->h = h;
     entry->header_parsed = (ctx->magic == M_MH);
+    entry->inode = inode;
     **last = entry;
     *last = &entry->next;
 
@@ -723,7 +726,8 @@
     dprint (2,
            (debugfile, "%s:%d: parsing %s\n", __FILE__, __LINE__,
             de->d_name));
-    maildir_parse_entry (ctx, last, subdir, de->d_name, count, is_old);
+    maildir_parse_entry (ctx, last, subdir, de->d_name, count, is_old, 
+                        de->d_ino);
   }
 
   closedir (dirp);
@@ -779,6 +783,94 @@
   return r;
 }
 
+/*
+ * Merge two maildir lists according to the inode numbers.
+ */
+static struct maildir*  maildir_merge_inode (struct maildir *left,
+                                            struct maildir *right)
+{
+  struct maildir* head;
+  struct maildir* tail;
+
+  if (left && right) 
+  {
+    if (left->inode < right->inode)
+    {
+      head = left;
+      left = left->next;
+    }
+    else 
+    {
+      head = right;
+      right = right->next;
+    }
+  } 
+  else 
+  {
+    if (left) 
+      return left;
+    else 
+      return right;
+  }
+    
+  tail = head;
+
+  while (left && right) 
+  {
+    if (left->inode < right->inode) 
+    {
+      tail->next = left;
+      left = left->next;
+    } 
+    else 
+    {
+      tail->next = right;
+      right = right->next;
+    }
+    tail = tail->next;
+  }
+
+  if (left) 
+  {
+    tail->next = left;
+  }
+  else
+  {
+    tail->next = right;
+  }
+
+  return head;
+}
+
+/*
+ * Sort maildir list according to inode.
+ */
+static struct maildir* maildir_sort_inode(struct maildir* list)
+{
+  struct maildir* left = list;
+  struct maildir* right = list;
+
+  if (!list || !list->next) 
+  {
+    return list;
+  }
+
+  list = list->next;
+  while (list && list->next) 
+  {
+    right = right->next;
+    list = list->next->next;
+  }
+
+  list = right;
+  right = right->next;
+  list->next = 0;
+
+  left = maildir_sort_inode(left);
+  right = maildir_sort_inode(right);
+  return maildir_merge_inode(left, right);
+}
+
 
 /* 
  * This function does the second parsing pass for a maildir-style
@@ -834,6 +926,8 @@
     mh_update_maildir (md, &mhs);
     mhs_free_sequences (&mhs);
   }
+
+  md = maildir_sort_inode(md);
 
   if (ctx->magic == M_MAILDIR)
     maildir_delayed_parsing (ctx, md);