Re: [PATCH] Make mutt_FormatString multibyte aware
Hi,
* Mon Mar 19 2007 Rocco Rutte <pdmef@xxxxxxx>
> @@ -1203,9 +1232,10 @@ void mutt_FormatString (char *dest, /*
> output buffer */
> wid = mutt_strwidth (buf);
> if (count > wid)
> {
> + int i;
> count -= wid; /* how many chars to pad */
> - memset (wptr, ch, count);
> - wptr += count;
> + for (i = 0; i < count; i++, wptr += padlen)
> + memcpy (wptr, pad, padlen);
> wlen += count;
> col += count;
> }
wlen is not always the same as col.
i.e. You can't do "col += count" for multi-byte.
Instead, you should calculate the width (not length) of pad.
> @@ -1221,14 +1251,17 @@ void mutt_FormatString (char *dest, /*
> output buffer */
> else if (ch == '|')
> {
> /* pad to EOL */
> - ch = *src++;
> + size_t padlen = get_mbchar (pad, src); /* pad char */
> + src += padlen;
> +
> if (destlen > COLS)
> destlen = COLS;
> if (destlen > wlen)
> {
> + int i;
> count = destlen - wlen;
> - memset (wptr, ch, count);
> - wptr += count;
> + for (i = 0; i < count; i++, wptr += padlen)
> + memcpy (wptr, pad, padlen);
> }
> break; /* skip rest of input */
> }
Here, too.
Also, wide chars require special care.
For example, you can't put two wide chars on a COLS==3 screen.
FYI, I have been using my FormatString for a while.
The patch is attached.
I hope you can see what I mean with it.
But please note this is not ready for inclusion.
I am not a multibyte expert either.
--
tamo
diff -r 4ade0c9660d5 mbyte.c
--- a/mbyte.c Thu Mar 15 16:29:42 2007 -0700
+++ b/mbyte.c Mon Mar 19 23:47:54 2007 +0900
@@ -470,6 +470,94 @@ size_t utf8rtowc (wchar_t *pwc, const ch
return (size_t)-2;
}
+/* wcswidth by Markus Kuhn -- 2000-02-08 -- public domain */
+int wcswidth(const wchar_t *pwcs, size_t n)
+{
+ int w, width = 0;
+
+ for (;*pwcs && n-- > 0; pwcs++)
+ if ((w = wcwidth(*pwcs)) < 0)
+ return -1;
+ else
+ width += w;
+
+ return width;
+}
+
+/* mbstowcs hack - may violate POSIX */
+size_t mbstowcs (wchar_t *pwcs, const char *s, size_t n)
+{
+ mbstate_t st;
+ size_t k;
+ wchar_t wc;
+ wchar_t *p = pwcs;
+
+ memset (&st, 0, sizeof (st));
+ for (; (k = mbrtowc (&wc, s, MB_LEN_MAX, &st)); s += k)
+ {
+ if (k == (size_t)(-1) || k == (size_t)(-2))
+ {
+ errno = EILSEQ;
+ return (size_t)(-1);
+ }
+ if (pwcs)
+ {
+ if (!n)
+ break;
+ *p++ = wc;
+ n--;
+ }
+ }
+ if (n && pwcs)
+ *p = 0;
+ return (p - pwcs);
+}
+
+/* wcstombs hack - may violate POSIX */
+size_t wcstombs (char *s, const wchar_t *pwcs, size_t n)
+{
+ size_t l, r = 0;
+ char buf[MB_LEN_MAX];
+
+ for (;*pwcs; pwcs++)
+ {
+ l = wcrtomb (buf, *pwcs, NULL);
+ if (l == (size_t)(-1))
+ {
+ errno = EILSEQ;
+ return l;
+ }
+ if (s)
+ {
+ if (n <= l)
+ break;
+ memcpy (s, buf, l);
+ s += l;
+ n -= l;
+ }
+ r += l;
+ }
+ if (s && n)
+ *s = '\0';
+ return r;
+}
+
+size_t wcslen (const wchar_t *s)
+{
+ size_t l = 0;
+ while (*s++)
+ l++;
+ return l;
+}
+
+wchar_t * wmemcpy (wchar_t *s1, const wchar_t *s2, size_t n)
+{
+ wchar_t *o = s1;
+ while (n--)
+ *s1++ = *s2++;
+ return o;
+}
+
#endif /* !HAVE_WC_FUNCS */
wchar_t replacement_char (void)
diff -r 4ade0c9660d5 mbyte.h
--- a/mbyte.h Thu Mar 15 16:29:42 2007 -0700
+++ b/mbyte.h Mon Mar 19 23:47:54 2007 +0900
@@ -28,6 +28,11 @@ wint_t towupper (wint_t wc);
wint_t towupper (wint_t wc);
wint_t towlower (wint_t wc);
int wcwidth (wchar_t wc);
+int wcswidth(const wchar_t *pwcs, size_t n);
+size_t mbstowcs (wchar_t *pwcs, const char *s, size_t n);
+size_t wcstombs (char *s, const wchar_t *pwcs, size_t n);
+size_t wcslen (const wchar_t *s);
+wchar_t * wmemcpy (wchar_t *s1, const wchar_t *s2, size_t n);
# endif /* !HAVE_WC_FUNCS */
diff -r 4ade0c9660d5 muttlib.c
--- a/muttlib.c Thu Mar 15 16:29:42 2007 -0700
+++ b/muttlib.c Tue Mar 20 00:03:48 2007 +0900
@@ -986,6 +986,8 @@ void mutt_safe_path (char *s, size_t l,
}
+static void _FormatWString (wchar_t *, size_t, const wchar_t *, format_t *,
unsigned long, format_flag);
+
void mutt_FormatString (char *dest, /* output buffer */
size_t destlen, /* output buffer len */
const char *src, /* template string */
@@ -993,18 +995,14 @@ void mutt_FormatString (char *dest, /*
unsigned long data, /* callback data */
format_flag flags) /* callback flags */
{
- char prefix[SHORT_STRING], buf[LONG_STRING], *cp, *wptr = dest, ch;
- char ifstring[SHORT_STRING], elsestring[SHORT_STRING];
- size_t wlen, count, len, col, wid;
+ wchar_t *wdest = safe_calloc (destlen, sizeof (wchar_t));
+ wchar_t *wsrc = safe_calloc (destlen, sizeof (wchar_t));
+ char buf[LONG_STRING], *wptr;
+ size_t wlen;
pid_t pid;
FILE *filter;
int n;
char *recycler;
-
- prefix[0] = '\0';
- destlen--; /* save room for the terminal \0 */
- wlen = (flags & M_FORMAT_ARROWCURSOR && option (OPTARROWCURSOR)) ? 3 : 0;
- col = wlen;
if ((flags & M_FORMAT_NOFILTER) == 0)
{
@@ -1073,7 +1071,7 @@ void mutt_FormatString (char *dest, /*
wlen = (flags & M_FORMAT_ARROWCURSOR && option (OPTARROWCURSOR)) ? 3 : 0;
if ((pid = mutt_create_filter(command->data, NULL, &filter, NULL)))
{
- n = fread(dest, 1, destlen /* already decremented */, filter);
+ n = fread(dest, 1, --destlen, filter);
fclose(filter);
dest[n] = '\0';
while (dest[n-1] == '\n' || dest[n-1] == '\r')
@@ -1110,9 +1108,39 @@ void mutt_FormatString (char *dest, /*
mutt_buffer_free(&command);
mutt_buffer_free(&srcbuf);
mutt_buffer_free(&word);
- return;
- }
- }
+ goto finish;
+ }
+ }
+
+ mbstowcs (wsrc, src, destlen);
+ wsrc[destlen - 1] = 0;
+ _FormatWString (wdest, destlen, wsrc, callback, data, flags);
+ wcstombs (dest, wdest, destlen);
+ dest[destlen - 1] = 0;
+
+finish:
+ FREE (&wdest);
+ FREE (&wsrc);
+}
+
+static void _FormatWString (wchar_t *dest,
+ size_t destlen,
+ const wchar_t *src,
+ format_t *callback,
+ unsigned long data,
+ format_flag flags)
+{
+ char prefix[SHORT_STRING], buf[LONG_STRING], *cp;
+ wchar_t wbuf[LONG_STRING], *wptr = dest, ch;
+ char ifstring[SHORT_STRING + MB_LEN_MAX], elsestring[SHORT_STRING +
MB_LEN_MAX];
+ size_t wlen, count, len, col, wid;
+ char mbuf[LONG_STRING];
+ mbstate_t mbstate;
+
+ prefix[0] = '\0';
+ destlen--; /* save room for the terminal \0 */
+ wlen = (flags & M_FORMAT_ARROWCURSOR && option (OPTARROWCURSOR)) ? 3 : 0;
+ col = wlen;
while (*src && wlen < destlen)
{
@@ -1162,10 +1190,18 @@ void mutt_FormatString (char *dest, /*
/* eat the `if' part of the string */
cp = ifstring;
count = 0;
- while (count < sizeof (ifstring) && *src && *src != '?' && *src != '&')
+ memset (&mbstate, 0, sizeof (mbstate));
+ while (*src && *src != '?' && *src != '&')
{
- *cp++ = *src++;
- count++;
+ if (count < (sizeof (ifstring) - MB_LEN_MAX))
+ {
+ if ((len = wcrtomb (cp, *src++, &mbstate)) == -1)
+ len = 1, *cp = '?'; /* ??? - should I clear mbstate too? */
+ cp += len;
+ }
+ else
+ len = 0;
+ count += len;
}
*cp = 0;
@@ -1174,10 +1210,18 @@ void mutt_FormatString (char *dest, /*
src++; /* skip the & */
cp = elsestring;
count = 0;
- while (count < sizeof (elsestring) && *src && *src != '?')
+ memset (&mbstate, 0, sizeof (mbstate));
+ while (*src && *src != '?')
{
- *cp++ = *src++;
- count++;
+ if (count < (sizeof (elsestring) - MB_LEN_MAX))
+ {
+ if ((len = wcrtomb (cp, *src++, &mbstate)) == -1)
+ len = 1, *cp = '?'; /* ??? - should I clear mbstate too? */
+ cp += len;
+ }
+ else
+ len = 0;
+ count += len;
}
*cp = 0;
@@ -1198,23 +1242,33 @@ void mutt_FormatString (char *dest, /*
if (count > col)
{
count -= col; /* how many columns left on this line */
- mutt_FormatString (buf, sizeof (buf), src, callback, data, flags);
- len = mutt_strlen (buf);
- wid = mutt_strwidth (buf);
+ _FormatWString (wbuf, sizeof (wbuf), src, callback, data, flags);
+ len = wcslen (wbuf);
+ wid = wcswidth (wbuf, sizeof (wbuf));
if (count > wid)
{
- count -= wid; /* how many chars to pad */
- memset (wptr, ch, count);
- wptr += count;
- wlen += count;
- col += count;
+ count -= wid; /* how many columns to pad */
+ while (count >= wcwidth (ch))
+ {
+ *wptr++ = ch;
+ wlen++;
+ col += wcwidth (ch);
+ count -= wcwidth (ch);
+ }
+ while (count)
+ {
+ *wptr++ = ' ';
+ wlen++;
+ col++;
+ count--;
+ }
}
if (len + wlen > destlen)
len = destlen - wlen;
- memcpy (wptr, buf, len);
+ wmemcpy (wptr, wbuf, len);
wptr += len;
wlen += len;
- col += mutt_strwidth (buf);
+ col += wcswidth (wbuf, sizeof (wbuf));
}
break; /* skip rest of input */
}
@@ -1222,13 +1276,23 @@ void mutt_FormatString (char *dest, /*
{
/* pad to EOL */
ch = *src++;
- if (destlen > COLS)
- destlen = COLS;
- if (destlen > wlen)
+ if (COLS > col)
{
- count = destlen - wlen;
- memset (wptr, ch, count);
- wptr += count;
+ count = COLS - col;
+ while (count >= wcwidth (ch) && destlen > wlen)
+ {
+ *wptr++ = ch;
+ wlen++;
+ count -= wcwidth (ch);
+ /* col += wcwidth (ch); */
+ }
+ while (count && destlen > wlen)
+ {
+ *wptr++ = ' ';
+ wlen++;
+ count--;
+ /* col++; */
+ }
}
break; /* skip rest of input */
}
@@ -1246,9 +1310,11 @@ void mutt_FormatString (char *dest, /*
ch = *src++;
}
-
+
/* use callback function to handle this case */
- src = callback (buf, sizeof (buf), ch, src, prefix, ifstring,
elsestring, data, flags);
+ wcstombs (mbuf, src, sizeof (mbuf));
+ mbuf[sizeof (mbuf) - 1] = 0;
+ src += callback (buf, sizeof (buf), ch, mbuf, prefix, ifstring,
elsestring, data, flags) - mbuf;
if (tolower)
mutt_strlower (buf);
@@ -1260,10 +1326,9 @@ void mutt_FormatString (char *dest, /*
*p = '_';
}
- if ((len = mutt_strlen (buf)) + wlen > destlen)
- len = (destlen - wlen > 0) ? (destlen - wlen) : 0;
-
- memcpy (wptr, buf, len);
+ len = mbstowcs (wptr, buf, destlen - wlen);
+ if (len == -1)
+ break; /* ??? */
wptr += len;
wlen += len;
col += mutt_strwidth (buf);
@@ -1297,13 +1362,13 @@ void mutt_FormatString (char *dest, /*
src++;
wptr++;
wlen++;
- col++;
+ col++; /* ??? */
}
else
{
- *wptr++ = *src++;
+ *wptr++ = *src;
wlen++;
- col++;
+ col += wcwidth (*src++);
}
}
*wptr = 0;