<<< Date Index >>>     <<< Thread Index >>>

Re: mutt_FormatString() not multibyte-aware



* Fri Jun 23 2006 Rocco Rutte <pdmef@xxxxxxx>
> mutt_FormatString() isn't multibyte-aware in several ways.
> 
> First, it doesn't support padding with multibyte-chars due to lines like 
> 'ch = *src++' etc.
> 
> Second, padding doesn't work if the string contains multibyte characters 
> since the pad length seems to be calculated by going with bytes and not 
> character cells of the result.
> 
> I would like to fix it but don't know how since one of the mbyte 
> functions failed for we in always returning 1 for the width. Maybe we 
> could also convert to utf-8 first because it's so trivial to test for 
> continuations (as mutt IIRC does in other places already).

Yeah, mbs is too hard to handle because you have to keep mbstate.
See the attached patch. This is just a hack, but it works if you
have wcswidth, wmemcpy, wcslen, etc.

> Should I file a bug report for this to have a discussion in the BTS or 
> is a real fix easy enough so I don't have to?

I'm afraid there is no easy fix.

-- 
tamo
Index: muttlib.c
===================================================================
RCS file: /home/roessler/cvs/mutt/muttlib.c,v
retrieving revision 3.44
diff -p -u -r3.44 muttlib.c
--- muttlib.c   8 Jun 2006 11:56:05 -0000       3.44
+++ muttlib.c   24 Jun 2006 07:27:29 -0000
@@ -986,6 +1001,8 @@ void mutt_safe_path (char *s, size_t l, 
 }
 
 
+static void _FormatWString (wchar_t *, size_t, const wchar_t *, format_t *, 
unsigned long, format_flag);
+
 void mutt_FormatString (char *dest,            /* output buffer */
                        size_t destlen,         /* output buffer len */
                        const char *src,        /* template string */
@@ -993,9 +1010,29 @@ void mutt_FormatString (char *dest,               /* 
                        unsigned long data,     /* callback data */
                        format_flag flags)      /* callback flags */
 {
-  char prefix[SHORT_STRING], buf[LONG_STRING], *cp, *wptr = dest, ch;
+  wchar_t *wdest = safe_calloc (destlen, sizeof (wchar_t));
+  wchar_t *wsrc = safe_calloc (destlen, sizeof (wchar_t));
+  mbstowcs (wsrc, src, destlen);
+  wsrc[destlen - 1] = 0;
+  _FormatWString (wdest, destlen, wsrc, callback, data, flags);
+  wcstombs (dest, wdest, destlen);
+  dest[destlen - 1] = 0;
+  FREE (&wdest);
+  FREE (&wsrc);
+}
+
+static void _FormatWString (wchar_t *dest,
+                           size_t destlen,
+                           const wchar_t *src,
+                           format_t *callback,
+                           unsigned long data,
+                           format_flag flags)
+{
+  char prefix[SHORT_STRING], buf[LONG_STRING], *cp;
+  wchar_t wbuf[LONG_STRING], *wptr = dest, ch;
   char ifstring[SHORT_STRING], elsestring[SHORT_STRING];
   size_t wlen, count, len, col, wid;
+  char mbuf[LONG_STRING];
 
   prefix[0] = '\0';
   destlen--; /* save room for the terminal \0 */
@@ -1086,23 +1123,33 @@ void mutt_FormatString (char *dest,             /* 
        if (count > col)
        {
          count -= col; /* how many columns left on this line */
-         mutt_FormatString (buf, sizeof (buf), src, callback, data, flags);
-         len = mutt_strlen (buf);
-         wid = mutt_strwidth (buf);
+         _FormatWString (wbuf, sizeof (wbuf), src, callback, data, flags);
+         len = wcslen (wbuf);
+         wid = wcswidth (wbuf, sizeof (wbuf));
          if (count > wid)
          {
-           count -= wid; /* how many chars to pad */
-           memset (wptr, ch, count);
-           wptr += count;
-           wlen += count;
-           col += count;
+           count -= wid; /* how many columns to pad */
+           while (count >= wcwidth (ch))
+           {
+             *wptr++ = ch;
+             wlen++;
+             col += wcwidth (ch);
+             count -= wcwidth (ch);
+           }
+           while (count)
+           {
+             *wptr++ = ' ';
+             wlen++;
+             col++;
+             count--;
+           }
          }
          if (len + wlen > destlen)
            len = destlen - wlen;
-         memcpy (wptr, buf, len);
+         wmemcpy (wptr, wbuf, len);
          wptr += len;
          wlen += len;
-         col += mutt_strwidth (buf);
+         col += wcswidth (wbuf, sizeof (wbuf));
        }
        break; /* skip rest of input */
       }
@@ -1115,8 +1162,16 @@ void mutt_FormatString (char *dest,              /* 
        if (destlen > wlen)
        {
          count = destlen - wlen;
-         memset (wptr, ch, count);
-         wptr += count;
+         while (count >= wcwidth (ch))
+         {
+           *wptr++ = ch;
+           count -= wcwidth (ch);
+         }
+         while (count)
+         {
+           *wptr++ = ' ';
+           count--;
+         }
        }
        break; /* skip rest of input */
       }
@@ -1134,9 +1189,11 @@ void mutt_FormatString (char *dest,              /* 
          
          ch = *src++;
        }
-       
+
        /* use callback function to handle this case */
-       src = callback (buf, sizeof (buf), ch, src, prefix, ifstring, 
elsestring, data, flags);
+       wcstombs (mbuf, src, sizeof (mbuf));
+       mbuf[sizeof (mbuf) - 1] = 0;
+       src += callback (buf, sizeof (buf), ch, mbuf, prefix, ifstring, 
elsestring, data, flags) - mbuf;
 
        if (tolower)
          mutt_strlower (buf);
@@ -1148,10 +1205,9 @@ void mutt_FormatString (char *dest,              /* 
                *p = '_';
        }
        
-       if ((len = mutt_strlen (buf)) + wlen > destlen)
-         len = (destlen - wlen > 0) ? (destlen - wlen) : 0;
-
-       memcpy (wptr, buf, len);
+       len = mbstowcs (wptr, buf, destlen - wlen);
+       if (len == -1)
+         break; /* ??? */
        wptr += len;
        wlen += len;
        col += mutt_strwidth (buf);
@@ -1185,13 +1241,13 @@ void mutt_FormatString (char *dest,             /* 
       src++;
       wptr++;
       wlen++;
-      col++;
+      col++; /* ??? */
     }
     else
     {
-      *wptr++ = *src++;
+      *wptr++ = *src;
       wlen++;
-      col++;
+      col += wcwidth (*src++);
     }
   }
   *wptr = 0;