<<< Date Index >>>     <<< Thread Index >>>

Re: mutt_FormatString() not multibyte-aware



* Sat Jun 24 2006 Rocco Rutte <pdmef@xxxxxxx>
> * TAKAHASHI Tamotsu <ttakah@xxxxxxxxxxxxxxxxx>:
> >See the attached patch. This is just a hack, but it works if you
> >have wcswidth, wmemcpy, wcslen, etc.
> 
> Ouch, it's really a hack and requires more work because once you have 
> wchar_t, all the callbacks should be aware of it too, I guess.

My patch converts the wcs to mbs and throws it to the callbacks,
and converts the result (mbs) to wcs again.
So it works (*), but yes, it's a hack - I have to check wc-funcs
before using them.
Even if you modify all the callbacks, that doesn't affect what
you see on the screen.

(*) It works as long as prefix, ifstring, and elsestring are ASCII
 - this limitation has been removed by the attached patch.


> Another approach which just pops into my brain is to really rework 
> mutt_FormatString like this: first go through the input and use the 
> callbacks to make up an array of replacements as first pass. Afterwards 
> mutt_FormatString() could use whatever semantics it wants internally, 
> like going with wchar_t or UTF-8 or...

That sounds good, though I don't know which is faster.

-- 
tamo
Index: muttlib.c
===================================================================
RCS file: /home/roessler/cvs/mutt/muttlib.c,v
retrieving revision 3.44
diff -p -u -r3.44 muttlib.c
--- muttlib.c   8 Jun 2006 11:56:05 -0000       3.44
+++ muttlib.c   25 Jun 2006 07:43:35 -0000
@@ -986,6 +986,8 @@ void mutt_safe_path (char *s, size_t l, 
 }
 
 
+static void _FormatWString (wchar_t *, size_t, const wchar_t *, format_t *, 
unsigned long, format_flag);
+
 void mutt_FormatString (char *dest,            /* output buffer */
                        size_t destlen,         /* output buffer len */
                        const char *src,        /* template string */
@@ -993,9 +995,30 @@ void mutt_FormatString (char *dest,                /* 
                        unsigned long data,     /* callback data */
                        format_flag flags)      /* callback flags */
 {
-  char prefix[SHORT_STRING], buf[LONG_STRING], *cp, *wptr = dest, ch;
-  char ifstring[SHORT_STRING], elsestring[SHORT_STRING];
+  wchar_t *wdest = safe_calloc (destlen, sizeof (wchar_t));
+  wchar_t *wsrc = safe_calloc (destlen, sizeof (wchar_t));
+  mbstowcs (wsrc, src, destlen);
+  wsrc[destlen - 1] = 0;
+  _FormatWString (wdest, destlen, wsrc, callback, data, flags);
+  wcstombs (dest, wdest, destlen);
+  dest[destlen - 1] = 0;
+  FREE (&wdest);
+  FREE (&wsrc);
+}
+
+static void _FormatWString (wchar_t *dest,
+                           size_t destlen,
+                           const wchar_t *src,
+                           format_t *callback,
+                           unsigned long data,
+                           format_flag flags)
+{
+  char prefix[SHORT_STRING], buf[LONG_STRING], *cp;
+  wchar_t wbuf[LONG_STRING], *wptr = dest, ch;
+  char ifstring[SHORT_STRING + MB_LEN_MAX], elsestring[SHORT_STRING + 
MB_LEN_MAX];
   size_t wlen, count, len, col, wid;
+  char mbuf[LONG_STRING];
+  mbstate_t mbstate;
 
   prefix[0] = '\0';
   destlen--; /* save room for the terminal \0 */
@@ -1050,10 +1073,18 @@ void mutt_FormatString (char *dest,             /* 
         /* eat the `if' part of the string */
         cp = ifstring;
        count = 0;
-        while (count < sizeof (ifstring) && *src && *src != '?' && *src != '&')
+        memset (&mbstate, 0, sizeof (mbstate));
+        while (*src && *src != '?' && *src != '&')
        {
-          *cp++ = *src++;
-         count++;
+         if (count < (sizeof (ifstring) - MB_LEN_MAX))
+         {
+           if ((len = wcrtomb (cp, *src++, &mbstate)) == -1)
+             len = 1, *cp = '?'; /* ??? - should I clear mbstate too? */
+            cp += len;
+         }
+         else
+           len = 0;
+         count += len;
        }
         *cp = 0;
 
@@ -1062,10 +1093,18 @@ void mutt_FormatString (char *dest,             /* 
          src++; /* skip the & */
        cp = elsestring;
        count = 0;
-       while (count < sizeof (elsestring) && *src && *src != '?')
+        memset (&mbstate, 0, sizeof (mbstate));
+       while (*src && *src != '?')
        {
-         *cp++ = *src++;
-         count++;
+         if (count < (sizeof (elsestring) - MB_LEN_MAX))
+         {
+           if ((len = wcrtomb (cp, *src++, &mbstate)) == -1)
+             len = 1, *cp = '?'; /* ??? - should I clear mbstate too? */
+            cp += len;
+         }
+         else
+           len = 0;
+         count += len;
        }
        *cp = 0;
 
@@ -1086,23 +1125,33 @@ void mutt_FormatString (char *dest,             /* 
        if (count > col)
        {
          count -= col; /* how many columns left on this line */
-         mutt_FormatString (buf, sizeof (buf), src, callback, data, flags);
-         len = mutt_strlen (buf);
-         wid = mutt_strwidth (buf);
+         _FormatWString (wbuf, sizeof (wbuf), src, callback, data, flags);
+         len = wcslen (wbuf);
+         wid = wcswidth (wbuf, sizeof (wbuf));
          if (count > wid)
          {
-           count -= wid; /* how many chars to pad */
-           memset (wptr, ch, count);
-           wptr += count;
-           wlen += count;
-           col += count;
+           count -= wid; /* how many columns to pad */
+           while (count >= wcwidth (ch))
+           {
+             *wptr++ = ch;
+             wlen++;
+             col += wcwidth (ch);
+             count -= wcwidth (ch);
+           }
+           while (count)
+           {
+             *wptr++ = ' ';
+             wlen++;
+             col++;
+             count--;
+           }
          }
          if (len + wlen > destlen)
            len = destlen - wlen;
-         memcpy (wptr, buf, len);
+         wmemcpy (wptr, wbuf, len);
          wptr += len;
          wlen += len;
-         col += mutt_strwidth (buf);
+         col += wcswidth (wbuf, sizeof (wbuf));
        }
        break; /* skip rest of input */
       }
@@ -1115,8 +1164,16 @@ void mutt_FormatString (char *dest,              /* 
        if (destlen > wlen)
        {
          count = destlen - wlen;
-         memset (wptr, ch, count);
-         wptr += count;
+         while (count >= wcwidth (ch))
+         {
+           *wptr++ = ch;
+           count -= wcwidth (ch);
+         }
+         while (count)
+         {
+           *wptr++ = ' ';
+           count--;
+         }
        }
        break; /* skip rest of input */
       }
@@ -1134,9 +1191,11 @@ void mutt_FormatString (char *dest,              /* 
          
          ch = *src++;
        }
-       
+
        /* use callback function to handle this case */
-       src = callback (buf, sizeof (buf), ch, src, prefix, ifstring, 
elsestring, data, flags);
+       wcstombs (mbuf, src, sizeof (mbuf));
+       mbuf[sizeof (mbuf) - 1] = 0;
+       src += callback (buf, sizeof (buf), ch, mbuf, prefix, ifstring, 
elsestring, data, flags) - mbuf;
 
        if (tolower)
          mutt_strlower (buf);
@@ -1148,10 +1207,9 @@ void mutt_FormatString (char *dest,              /* 
                *p = '_';
        }
        
-       if ((len = mutt_strlen (buf)) + wlen > destlen)
-         len = (destlen - wlen > 0) ? (destlen - wlen) : 0;
-
-       memcpy (wptr, buf, len);
+       len = mbstowcs (wptr, buf, destlen - wlen);
+       if (len == -1)
+         break; /* ??? */
        wptr += len;
        wlen += len;
        col += mutt_strwidth (buf);
@@ -1185,13 +1243,13 @@ void mutt_FormatString (char *dest,             /* 
       src++;
       wptr++;
       wlen++;
-      col++;
+      col++; /* ??? */
     }
     else
     {
-      *wptr++ = *src++;
+      *wptr++ = *src;
       wlen++;
-      col++;
+      col += wcwidth (*src++);
     }
   }
   *wptr = 0;