<<< Date Index >>>     <<< Thread Index >>>

Re: [PATCH] Make mutt_FormatString multibyte aware



Hi,

* Mon Mar 19 2007 Rocco Rutte <pdmef@xxxxxxx>
> @@ -1203,9 +1232,10 @@ void mutt_FormatString (char *dest,            /* 
> output buffer */
>         wid = mutt_strwidth (buf);
>         if (count > wid)
>         {
> +         int i;
>           count -= wid; /* how many chars to pad */
> -         memset (wptr, ch, count);
> -         wptr += count;
> +         for (i = 0; i < count; i++, wptr += padlen)
> +           memcpy (wptr, pad, padlen);
>           wlen += count;
>           col += count;
>         }

wlen is not always the same as col.
i.e. You can't do "col += count" for multi-byte.
Instead, you should calculate the width (not length) of pad.


> @@ -1221,14 +1251,17 @@ void mutt_FormatString (char *dest,           /* 
> output buffer */
>        else if (ch == '|')
>        {
>       /* pad to EOL */
> -     ch = *src++;
> +     size_t padlen = get_mbchar (pad, src); /* pad char */
> +     src += padlen;
> +
>       if (destlen > COLS)
>         destlen = COLS;
>       if (destlen > wlen)
>       {
> +       int i;
>         count = destlen - wlen;
> -       memset (wptr, ch, count);
> -       wptr += count;
> +       for (i = 0; i < count; i++, wptr += padlen)
> +         memcpy (wptr, pad, padlen);
>       }
>       break; /* skip rest of input */
>        }

Here, too.

Also, wide chars require special care.
For example, you can't put two wide chars on a COLS==3 screen.

FYI, I have been using my FormatString for a while.
The patch is attached.
I hope you can see what I mean with it.
But please note this is not ready for inclusion.
I am not a multibyte expert either.

-- 
tamo
diff -r 4ade0c9660d5 mbyte.c
--- a/mbyte.c   Thu Mar 15 16:29:42 2007 -0700
+++ b/mbyte.c   Mon Mar 19 23:47:54 2007 +0900
@@ -470,6 +470,94 @@ size_t utf8rtowc (wchar_t *pwc, const ch
   return (size_t)-2;
 }
 
+/* wcswidth by Markus Kuhn -- 2000-02-08 -- public domain */
+int wcswidth(const wchar_t *pwcs, size_t n)
+{
+  int w, width = 0;
+
+  for (;*pwcs && n-- > 0; pwcs++)
+    if ((w = wcwidth(*pwcs)) < 0)
+      return -1;
+    else
+      width += w;
+
+  return width;
+}
+
+/* mbstowcs hack - may violate POSIX */
+size_t mbstowcs (wchar_t *pwcs, const char *s, size_t n)
+{
+  mbstate_t st;
+  size_t k;
+  wchar_t wc;
+  wchar_t *p = pwcs;
+
+  memset (&st, 0, sizeof (st));
+  for (; (k = mbrtowc (&wc, s, MB_LEN_MAX, &st)); s += k)
+  {
+    if (k == (size_t)(-1) || k == (size_t)(-2))
+    {
+      errno = EILSEQ;
+      return (size_t)(-1);
+    }
+    if (pwcs)
+    {
+      if (!n)
+       break;
+      *p++ = wc;
+      n--;
+    }
+  }
+  if (n && pwcs)
+    *p = 0;
+  return (p - pwcs);
+}
+
+/* wcstombs hack - may violate POSIX */
+size_t wcstombs (char *s, const wchar_t *pwcs, size_t n)
+{
+  size_t l, r = 0;
+  char buf[MB_LEN_MAX];
+
+  for (;*pwcs; pwcs++)
+  {
+    l = wcrtomb (buf, *pwcs, NULL);
+    if (l == (size_t)(-1))
+    {
+      errno = EILSEQ;
+      return l;
+    }
+    if (s)
+    {
+      if (n <= l)
+       break;
+      memcpy (s, buf, l);
+      s += l;
+      n -= l;
+    }
+    r += l;
+  }
+  if (s && n)
+    *s = '\0';
+  return r;
+}
+
+size_t wcslen (const wchar_t *s)
+{
+  size_t l = 0;
+  while (*s++)
+    l++;
+  return l;
+}
+
+wchar_t * wmemcpy (wchar_t *s1, const wchar_t *s2, size_t n)
+{
+  wchar_t *o = s1;
+  while (n--)
+    *s1++ = *s2++;
+  return o;
+}
+
 #endif /* !HAVE_WC_FUNCS */
 
 wchar_t replacement_char (void)
diff -r 4ade0c9660d5 mbyte.h
--- a/mbyte.h   Thu Mar 15 16:29:42 2007 -0700
+++ b/mbyte.h   Mon Mar 19 23:47:54 2007 +0900
@@ -28,6 +28,11 @@ wint_t towupper (wint_t wc);
 wint_t towupper (wint_t wc);
 wint_t towlower (wint_t wc);
 int wcwidth (wchar_t wc);
+int wcswidth(const wchar_t *pwcs, size_t n);
+size_t mbstowcs (wchar_t *pwcs, const char *s, size_t n);
+size_t wcstombs (char *s, const wchar_t *pwcs, size_t n);
+size_t wcslen (const wchar_t *s);
+wchar_t * wmemcpy (wchar_t *s1, const wchar_t *s2, size_t n);
 # endif /* !HAVE_WC_FUNCS */
 
 
diff -r 4ade0c9660d5 muttlib.c
--- a/muttlib.c Thu Mar 15 16:29:42 2007 -0700
+++ b/muttlib.c Tue Mar 20 00:03:48 2007 +0900
@@ -986,6 +986,8 @@ void mutt_safe_path (char *s, size_t l, 
 }
 
 
+static void _FormatWString (wchar_t *, size_t, const wchar_t *, format_t *, 
unsigned long, format_flag);
+
 void mutt_FormatString (char *dest,            /* output buffer */
                        size_t destlen,         /* output buffer len */
                        const char *src,        /* template string */
@@ -993,18 +995,14 @@ void mutt_FormatString (char *dest,               /* 
                        unsigned long data,     /* callback data */
                        format_flag flags)      /* callback flags */
 {
-  char prefix[SHORT_STRING], buf[LONG_STRING], *cp, *wptr = dest, ch;
-  char ifstring[SHORT_STRING], elsestring[SHORT_STRING];
-  size_t wlen, count, len, col, wid;
+  wchar_t *wdest = safe_calloc (destlen, sizeof (wchar_t));
+  wchar_t *wsrc = safe_calloc (destlen, sizeof (wchar_t));
+  char buf[LONG_STRING], *wptr;
+  size_t wlen;
   pid_t pid;
   FILE *filter;
   int n;
   char *recycler;
-
-  prefix[0] = '\0';
-  destlen--; /* save room for the terminal \0 */
-  wlen = (flags & M_FORMAT_ARROWCURSOR && option (OPTARROWCURSOR)) ? 3 : 0;
-  col = wlen;
 
   if ((flags & M_FORMAT_NOFILTER) == 0)
   {
@@ -1073,7 +1071,7 @@ void mutt_FormatString (char *dest,               /* 
       wlen = (flags & M_FORMAT_ARROWCURSOR && option (OPTARROWCURSOR)) ? 3 : 0;
       if ((pid = mutt_create_filter(command->data, NULL, &filter, NULL)))
       {
-        n = fread(dest, 1, destlen /* already decremented */, filter);
+        n = fread(dest, 1, --destlen, filter);
         fclose(filter);
         dest[n] = '\0';
         while (dest[n-1] == '\n' || dest[n-1] == '\r')
@@ -1110,9 +1108,39 @@ void mutt_FormatString (char *dest,              /* 
       mutt_buffer_free(&command);
       mutt_buffer_free(&srcbuf);
       mutt_buffer_free(&word);
-      return;
-    }
-  }
+      goto finish;
+    }
+  }
+
+  mbstowcs (wsrc, src, destlen);
+  wsrc[destlen - 1] = 0;
+  _FormatWString (wdest, destlen, wsrc, callback, data, flags);
+  wcstombs (dest, wdest, destlen);
+  dest[destlen - 1] = 0;
+
+finish:
+  FREE (&wdest);
+  FREE (&wsrc);
+}
+
+static void _FormatWString (wchar_t *dest,
+                           size_t destlen,
+                           const wchar_t *src,
+                           format_t *callback,
+                           unsigned long data,
+                           format_flag flags)
+{
+  char prefix[SHORT_STRING], buf[LONG_STRING], *cp;
+  wchar_t wbuf[LONG_STRING], *wptr = dest, ch;
+  char ifstring[SHORT_STRING + MB_LEN_MAX], elsestring[SHORT_STRING + 
MB_LEN_MAX];
+  size_t wlen, count, len, col, wid;
+  char mbuf[LONG_STRING];
+  mbstate_t mbstate;
+
+  prefix[0] = '\0';
+  destlen--; /* save room for the terminal \0 */
+  wlen = (flags & M_FORMAT_ARROWCURSOR && option (OPTARROWCURSOR)) ? 3 : 0;
+  col = wlen;
 
   while (*src && wlen < destlen)
   {
@@ -1162,10 +1190,18 @@ void mutt_FormatString (char *dest,             /* 
         /* eat the `if' part of the string */
         cp = ifstring;
        count = 0;
-        while (count < sizeof (ifstring) && *src && *src != '?' && *src != '&')
+        memset (&mbstate, 0, sizeof (mbstate));
+        while (*src && *src != '?' && *src != '&')
        {
-          *cp++ = *src++;
-         count++;
+         if (count < (sizeof (ifstring) - MB_LEN_MAX))
+         {
+           if ((len = wcrtomb (cp, *src++, &mbstate)) == -1)
+             len = 1, *cp = '?'; /* ??? - should I clear mbstate too? */
+            cp += len;
+         }
+         else
+           len = 0;
+         count += len;
        }
         *cp = 0;
 
@@ -1174,10 +1210,18 @@ void mutt_FormatString (char *dest,             /* 
          src++; /* skip the & */
        cp = elsestring;
        count = 0;
-       while (count < sizeof (elsestring) && *src && *src != '?')
+        memset (&mbstate, 0, sizeof (mbstate));
+       while (*src && *src != '?')
        {
-         *cp++ = *src++;
-         count++;
+         if (count < (sizeof (elsestring) - MB_LEN_MAX))
+         {
+           if ((len = wcrtomb (cp, *src++, &mbstate)) == -1)
+             len = 1, *cp = '?'; /* ??? - should I clear mbstate too? */
+            cp += len;
+         }
+         else
+           len = 0;
+         count += len;
        }
        *cp = 0;
 
@@ -1198,23 +1242,33 @@ void mutt_FormatString (char *dest,             /* 
        if (count > col)
        {
          count -= col; /* how many columns left on this line */
-         mutt_FormatString (buf, sizeof (buf), src, callback, data, flags);
-         len = mutt_strlen (buf);
-         wid = mutt_strwidth (buf);
+         _FormatWString (wbuf, sizeof (wbuf), src, callback, data, flags);
+         len = wcslen (wbuf);
+         wid = wcswidth (wbuf, sizeof (wbuf));
          if (count > wid)
          {
-           count -= wid; /* how many chars to pad */
-           memset (wptr, ch, count);
-           wptr += count;
-           wlen += count;
-           col += count;
+           count -= wid; /* how many columns to pad */
+           while (count >= wcwidth (ch))
+           {
+             *wptr++ = ch;
+             wlen++;
+             col += wcwidth (ch);
+             count -= wcwidth (ch);
+           }
+           while (count)
+           {
+             *wptr++ = ' ';
+             wlen++;
+             col++;
+             count--;
+           }
          }
          if (len + wlen > destlen)
            len = destlen - wlen;
-         memcpy (wptr, buf, len);
+         wmemcpy (wptr, wbuf, len);
          wptr += len;
          wlen += len;
-         col += mutt_strwidth (buf);
+         col += wcswidth (wbuf, sizeof (wbuf));
        }
        break; /* skip rest of input */
       }
@@ -1222,13 +1276,23 @@ void mutt_FormatString (char *dest,             /* 
       {
        /* pad to EOL */
        ch = *src++;
-       if (destlen > COLS)
-         destlen = COLS;
-       if (destlen > wlen)
+       if (COLS > col)
        {
-         count = destlen - wlen;
-         memset (wptr, ch, count);
-         wptr += count;
+         count = COLS - col;
+         while (count >= wcwidth (ch) && destlen > wlen)
+         {
+           *wptr++ = ch;
+            wlen++;
+           count -= wcwidth (ch);
+            /* col += wcwidth (ch); */
+         }
+         while (count && destlen > wlen)
+         {
+           *wptr++ = ' ';
+            wlen++;
+           count--;
+            /* col++; */
+         }
        }
        break; /* skip rest of input */
       }
@@ -1246,9 +1310,11 @@ void mutt_FormatString (char *dest,              /* 
          
          ch = *src++;
        }
-       
+
        /* use callback function to handle this case */
-       src = callback (buf, sizeof (buf), ch, src, prefix, ifstring, 
elsestring, data, flags);
+       wcstombs (mbuf, src, sizeof (mbuf));
+       mbuf[sizeof (mbuf) - 1] = 0;
+       src += callback (buf, sizeof (buf), ch, mbuf, prefix, ifstring, 
elsestring, data, flags) - mbuf;
 
        if (tolower)
          mutt_strlower (buf);
@@ -1260,10 +1326,9 @@ void mutt_FormatString (char *dest,              /* 
                *p = '_';
        }
        
-       if ((len = mutt_strlen (buf)) + wlen > destlen)
-         len = (destlen - wlen > 0) ? (destlen - wlen) : 0;
-
-       memcpy (wptr, buf, len);
+       len = mbstowcs (wptr, buf, destlen - wlen);
+       if (len == -1)
+         break; /* ??? */
        wptr += len;
        wlen += len;
        col += mutt_strwidth (buf);
@@ -1297,13 +1362,13 @@ void mutt_FormatString (char *dest,             /* 
       src++;
       wptr++;
       wlen++;
-      col++;
+      col++; /* ??? */
     }
     else
     {
-      *wptr++ = *src++;
+      *wptr++ = *src;
       wlen++;
-      col++;
+      col += wcwidth (*src++);
     }
   }
   *wptr = 0;