<<< Date Index >>>     <<< Thread Index >>>

incomplete mbyte sequences + bogus libc's regexec(): segfault



hi all,

  on some systems, opening the attached crash-mutt mbox with the
  attached muttrc gives a segfault. the real culprit is a bug in libc's
  regexec routine which is only triggered when an *incomplete* (and,
  therefore, invalid) multibyte sequence is passed. mutt does this,
  i.e., fills a buffer with characters without checking if an incomplete
  sequence is left at the end.

  there is a patch [attached] for this by Anders Helmersson
  <anders.helmersson@xxxxxxxxx>, which it may be arguable if should be
  included in mutt or not: the current behavior is "garbage in, garbage
  out" (GIGO), the "GO" part being already fixed in libc's cvs, the "GI"
  part being still there (in mutt).

  in any case, i'd be very grateful to receive some feedback about the
  quality of this patch. i understand what it does but i have two main
  concerns: first, if it's really necessary to start checking at the
  beginning of the buffer instead of at the end (patch from myself
  trying to do so also attached; however, i don't know whether a
  subsequence of a valid mbyte sequence is always invalid); second,
  whether a call to fseek would be necessary.

  many thanks in advance,

-- 
Adeodato Simó
    EM: asp16 [ykwim] alu.ua.es | PK: DA6AE621
 
Linux. You can find a worse OS, but it costs more.
Index: pager.c
===================================================================
RCS file: /home/roessler/cvs/mutt/pager.c,v
retrieving revision 3.16
diff -u -r3.16 pager.c
--- pager.c     12 Jul 2004 13:35:27 -0000      3.16
+++ pager.c     21 Jul 2004 08:10:06 -0000
@@ -972,6 +972,9 @@
   unsigned char *p;
   static int b_read;
 
+  size_t k, n;
+  wchar_t wc;
+  
   if (*buf_ready == 0)
   {
     buf[blen - 1] = 0;
@@ -986,6 +989,18 @@
     b_read = (int) (*last_pos - offset);
     *buf_ready = 1;
 
+    /* trim tail of buf so that it contains complete multibyte characters */
+    for (n = b_read, p = buf; n > 0; p += k, n -= k)
+    {
+      k = mbrtowc (&wc, (char *) p, n, NULL);
+      if (k == -2) 
+       break; 
+      else if (k == -1 || k == 0) 
+       k = 1;
+    }
+    b_read -= n;
+    buf[b_read] = 0;
+    
     /* copy "buf" to "fmt", but without bold and underline controls */
     p = buf;
     while (*p)
--- mutt-cvs/pager.c    2004-07-21 08:32:10.000000000 +0200
+++ mutt-cvs/pager.c    2004-07-24 04:21:29.000000000 +0200
@@ -972,6 +972,8 @@
   unsigned char *p;
   static int b_read;
 
+  size_t k, n;
+  
   if (*buf_ready == 0)
   {
     buf[blen - 1] = 0;
@@ -986,6 +988,21 @@
     b_read = (int) (*last_pos - offset);
     *buf_ready = 1;
 
+    /* trim tail of buf so that it contains complete multibyte characters */
+    for (k = 0, n = 1, p = buf + b_read - 1; n <= b_read; ++n, --p)
+    {
+      k = mbrtowc (NULL, (char *) p, n, NULL);
+
+      if ((int) k >= 0)
+       break;
+    }
+
+    if (k > 0)
+    {
+      *(p+k) = 0;
+      b_read -= n-k;
+    }
+    
     /* copy "buf" to "fmt", but without bold and underline controls */
     p = buf;
     while (*p)

Attachment: crash-mutt.gz
Description: Binary data

color body      cyan default    "[0-1]"