<<< Date Index >>>     <<< Thread Index >>>

[patch] sorting efficiency



Sorting by to or from header is inacceptably slow for large
mailboxes if reverse_alias is enabled, and if there is even a modest
number of aliases in place (here: 500).  The main culprit is the
reverse alias lookup code which does a linear search through a
linked list.  That code is invoked each time an address is made
"human-friendly", i.e., *often*.

It also looks like the IDNA code is invoked way more often than it
should be, and like we're juggling around with dynamic memory where
it isn't worth it.

The attached patch does several things:

 - reverse alias lookup is fixed to be hash-based.  I haven't
   bothered to fix the forward alias lookups, since these are
   invoked on a much smaller scale
   
 - perform a check for IDN-ness, and cache the result, before
   subjecting an address to the larger IDN code just for display
   purposes
   
 - throw away some oft he dynamic memory wastefulness in
   compare_from and compare_to; cut off comparison after 128
   characters.

Brendan, I haven't pushed this into HG quite yet.

Comments, as always, welcome.
-- 
Thomas Roessler   <roessler@xxxxxxxxxxxxxxxxxx>
diff -r 17adea9cdff6 alias.c
--- a/alias.c   Mon Sep 01 18:23:35 2008 +0200
+++ b/alias.c   Sun Sep 07 15:58:23 2008 +0200
@@ -332,6 +332,8 @@
     return;
   }
 
+  alias_add_reverse (new);
+  
   if ((t = Aliases))
   {
     while (t->next)
@@ -440,23 +442,36 @@
  */
 ADDRESS *alias_reverse_lookup (ADDRESS *a)
 {
-  ALIAS *t = Aliases;
+  if (!a || !a->mailbox)
+      return NULL;
+  
+  return hash_find (ReverseAlias, a->mailbox);
+}
+
+void alias_add_reverse (ALIAS *t)
+{
   ADDRESS *ap;
+  if (!t)
+    return;
+  
+  for (ap = t->addr; ap; ap = ap->next)
+  {
+    if (!ap->group && ap->mailbox)
+      hash_insert (ReverseAlias, ap->mailbox, ap, 1);
+  }
+}
 
-  if (!a || !a->mailbox)
-    return NULL;
-
-  for (; t; t = t->next)
+void alias_delete_reverse (ALIAS *t)
+{
+  ADDRESS *ap;
+  if (!t)
+    return;
+  
+  for (ap = t->addr; ap; ap = ap->next)
   {
-    /* cycle through all addresses if this is a group alias */
-    for (ap = t->addr; ap; ap = ap->next)
-    {
-      if (!ap->group && ap->mailbox &&
-         ascii_strcasecmp (ap->mailbox, a->mailbox) == 0)
-       return ap;
-    }
+    if (!ap->group && ap->mailbox)
+      hash_delete (ReverseAlias, ap->mailbox, ap, NULL);
   }
-  return 0;
 }
 
 /* alias_complete() -- alias completion routine
diff -r 17adea9cdff6 globals.h
--- a/globals.h Mon Sep 01 18:23:35 2008 +0200
+++ b/globals.h Sun Sep 07 15:58:23 2008 +0200
@@ -151,6 +151,7 @@
 WHERE const char *ReleaseDate;
 
 WHERE HASH *Groups;
+WHERE HASH *ReverseAlias;
 
 WHERE LIST *AutoViewList INITVAL(0);
 WHERE LIST *AlternativeOrderList INITVAL(0);
diff -r 17adea9cdff6 init.c
--- a/init.c    Mon Sep 01 18:23:35 2008 +0200
+++ b/init.c    Sun Sep 07 15:58:23 2008 +0200
@@ -1263,7 +1263,7 @@
     {
       if (CurrentMenu == MENU_ALIAS)
       {
-       for (tmp = Aliases; tmp ; tmp = tmp->next)
+       for (tmp = Aliases; tmp ; tmp = tmp->next) 
          tmp->del = 1;
        set_option (OPTFORCEREDRAWINDEX);
       }
@@ -1336,6 +1336,7 @@
   }
   else
   {
+    alias_delete_reverse (tmp);
     /* override the previous value */
     rfc822_free_address (&tmp->addr);
     if (CurrentMenu == MENU_ALIAS)
@@ -1360,7 +1361,7 @@
   }
 
   mutt_group_context_add_adrlist (gc, tmp->addr);
-
+  alias_add_reverse (tmp);
 
 #ifdef DEBUG
   if (debuglevel >= 2) 
@@ -2881,6 +2882,7 @@
   err.dsize = sizeof (error);
 
   Groups = hash_create (1031);
+  ReverseAlias = hash_create (1031);
   
   /* 
    * XXX - use something even more difficult to predict?
diff -r 17adea9cdff6 mutt_idna.c
--- a/mutt_idna.c       Mon Sep 01 18:23:35 2008 +0200
+++ b/mutt_idna.c       Sun Sep 07 15:58:23 2008 +0200
@@ -42,6 +42,29 @@
                        
 #else
 
+/* check whether an address is an IDN */
+
+static int check_idn (ADDRESS *ap)
+{
+  char *p = 0;
+
+  if (!ap || !ap->mailbox)
+    return 0;
+
+  if (!ap->idn_checked)
+  {
+    ap->idn_checked = 1;
+    for (p = strchr (ap->mailbox, '@'); p && *p; p = strchr (p, '.')) 
+      if (ascii_strncasecmp (++p, "xn--", 4) == 0)
+      {
+       ap->is_idn = 1;
+       break;
+      }
+  }
+  
+  return ap->is_idn;
+}
+
 int mutt_idna_to_local (const char *in, char **out, int flags)
 {
   *out = NULL;
@@ -51,7 +74,7 @@
 
   if (!in)
     goto notrans;
-  
+
   /* Is this the right function?  Interesting effects with some bad 
identifiers! */
   if (idna_to_unicode_8z8z (in, out, 1) != IDNA_SUCCESS)
     goto notrans;
@@ -132,16 +155,17 @@
 
 static int mbox_to_udomain (const char *mbx, char **user, char **domain)
 {
+  static char *buff = NULL;
   char *p;
-  *user = NULL;
-  *domain = NULL;
   
-  p = strchr (mbx, '@');
+  mutt_str_replace (&buff, mbx);
+  
+  p = strchr (buff, '@');
   if (!p || !p[1])
     return -1;
-  *user = safe_calloc((p - mbx + 1), sizeof(mbx[0]));
-  strfcpy (*user, mbx, (p - mbx + 1));
-  *domain = safe_strdup(p + 1);
+  *p = '\0';
+  *user = buff;
+  *domain  = p + 1;
   return 0;
 }
 
@@ -171,10 +195,9 @@
     {
       safe_realloc (&a->mailbox, mutt_strlen (user) + mutt_strlen (tmp) + 2);
       sprintf (a->mailbox, "%s@%s", NONULL(user), NONULL(tmp)); /* 
__SPRINTF_CHECKED__ */
+      a->idn_checked = 0;
     }
     
-    FREE (&domain);
-    FREE (&user);
     FREE (&tmp);
     
     if (e)
@@ -193,17 +216,17 @@
   {
     if (!a->mailbox)
       continue;
+    if (!check_idn (a))
+      continue;
     if (mbox_to_udomain (a->mailbox, &user, &domain) == -1)
       continue;
-    
     if (mutt_idna_to_local (domain, &tmp, 0) == 0)
     {
       safe_realloc (&a->mailbox, mutt_strlen (user) + mutt_strlen (tmp) + 2);
       sprintf (a->mailbox, "%s@%s", NONULL (user), NONULL (tmp)); /* 
__SPRINTF_CHECKED__ */
+      a->idn_checked = 0;
     }
     
-    FREE (&domain);
-    FREE (&user);
     FREE (&tmp);
   }
   
@@ -221,13 +244,13 @@
   char *user = NULL;
   
   FREE (&buff);
-  
+
+  if (!check_idn (a))
+    return a->mailbox;
   if (mbox_to_udomain (a->mailbox, &user, &domain) != 0)
     return a->mailbox;
   if (mutt_idna_to_local (domain, &tmp, MI_MAY_BE_IRREVERSIBLE) != 0)
   {
-    FREE (&user);
-    FREE (&domain);
     FREE (&tmp);
     return a->mailbox;
   }
@@ -235,8 +258,6 @@
   safe_realloc (&buff, mutt_strlen (tmp) + mutt_strlen (user) + 2);
   sprintf (buff, "%s@%s", NONULL(user), NONULL(tmp)); /* __SPRINTF_CHECKED__ */
   FREE (&tmp);
-  FREE (&user);
-  FREE (&domain);
   return buff;
 }
 
diff -r 17adea9cdff6 muttlib.c
--- a/muttlib.c Mon Sep 01 18:23:35 2008 +0200
+++ b/muttlib.c Sun Sep 07 15:58:23 2008 +0200
@@ -746,6 +746,7 @@
   {
     t = *p;
     *p = (*p)->next;
+    alias_delete_reverse (t);
     FREE (&t->name);
     rfc822_free_address (&t->addr);
     FREE (&t);
diff -r 17adea9cdff6 rfc822.h
--- a/rfc822.h  Mon Sep 01 18:23:35 2008 +0200
+++ b/rfc822.h  Sun Sep 07 15:58:23 2008 +0200
@@ -39,6 +39,8 @@
   char *mailbox;       /* mailbox and host address */
   int group;           /* group mailbox? */
   struct address_t *next;
+  unsigned is_idn      : 1;
+  unsigned idn_checked : 1;
 }
 ADDRESS;
 
diff -r 17adea9cdff6 sort.c
--- a/sort.c    Mon Sep 01 18:23:35 2008 +0200
+++ b/sort.c    Sun Sep 07 15:58:23 2008 +0200
@@ -111,13 +111,13 @@
 {
   HEADER **ppa = (HEADER **) a;
   HEADER **ppb = (HEADER **) b;
-  const char *fa, *fb;
+  char fa[SHORT_STRING];
+  const char *fb;
   int result;
 
-  fa = safe_strdup (mutt_get_name ((*ppa)->env->to));
+  strfcpy (fa, mutt_get_name ((*ppa)->env->to), SHORT_STRING);
   fb = mutt_get_name ((*ppb)->env->to);
-  result = mutt_strcasecmp (fa, fb);
-  FREE(&fa);
+  result = mutt_strncasecmp (fa, fb, SHORT_STRING);
   AUXSORT(result,a,b);
   return (SORTCODE (result));
 }
@@ -126,13 +126,13 @@
 {
   HEADER **ppa = (HEADER **) a;
   HEADER **ppb = (HEADER **) b;
-  const char *fa, *fb;
+  char fa[SHORT_STRING];
+  const char *fb;
   int result;
 
-  fa = safe_strdup (mutt_get_name ((*ppa)->env->from));
+  strfcpy (fa, mutt_get_name ((*ppa)->env->from), SHORT_STRING);
   fb = mutt_get_name ((*ppb)->env->from);
-  result = mutt_strcasecmp (fa, fb);
-  FREE(&fa);
+  result = mutt_strncasecmp (fa, fb, SHORT_STRING);
   AUXSORT(result,a,b);
   return (SORTCODE (result));
 }