Re: mutt_FormatString() not multibyte-aware
* Sat Jun 24 2006 Rocco Rutte <pdmef@xxxxxxx>
> * TAKAHASHI Tamotsu <ttakah@xxxxxxxxxxxxxxxxx>:
> >See the attached patch. This is just a hack, but it works if you
> >have wcswidth, wmemcpy, wcslen, etc.
>
> Ouch, it's really a hack and requires more work because once you have
> wchar_t, all the callbacks should be aware of it too, I guess.
My patch converts the wcs to mbs and throws it to the callbacks,
and converts the result (mbs) to wcs again.
So it works (*), but yes, it's a hack - I have to check wc-funcs
before using them.
Even if you modify all the callbacks, that doesn't affect what
you see on the screen.
(*) It works as long as prefix, ifstring, and elsestring are ASCII
- this limitation has been removed by the attached patch.
> Another approach which just pops into my brain is to really rework
> mutt_FormatString like this: first go through the input and use the
> callbacks to make up an array of replacements as first pass. Afterwards
> mutt_FormatString() could use whatever semantics it wants internally,
> like going with wchar_t or UTF-8 or...
That sounds good, though I don't know which is faster.
--
tamo
Index: muttlib.c
===================================================================
RCS file: /home/roessler/cvs/mutt/muttlib.c,v
retrieving revision 3.44
diff -p -u -r3.44 muttlib.c
--- muttlib.c 8 Jun 2006 11:56:05 -0000 3.44
+++ muttlib.c 25 Jun 2006 07:43:35 -0000
@@ -986,6 +986,8 @@ void mutt_safe_path (char *s, size_t l,
}
+static void _FormatWString (wchar_t *, size_t, const wchar_t *, format_t *,
unsigned long, format_flag);
+
void mutt_FormatString (char *dest, /* output buffer */
size_t destlen, /* output buffer len */
const char *src, /* template string */
@@ -993,9 +995,30 @@ void mutt_FormatString (char *dest, /*
unsigned long data, /* callback data */
format_flag flags) /* callback flags */
{
- char prefix[SHORT_STRING], buf[LONG_STRING], *cp, *wptr = dest, ch;
- char ifstring[SHORT_STRING], elsestring[SHORT_STRING];
+ wchar_t *wdest = safe_calloc (destlen, sizeof (wchar_t));
+ wchar_t *wsrc = safe_calloc (destlen, sizeof (wchar_t));
+ mbstowcs (wsrc, src, destlen);
+ wsrc[destlen - 1] = 0;
+ _FormatWString (wdest, destlen, wsrc, callback, data, flags);
+ wcstombs (dest, wdest, destlen);
+ dest[destlen - 1] = 0;
+ FREE (&wdest);
+ FREE (&wsrc);
+}
+
+static void _FormatWString (wchar_t *dest,
+ size_t destlen,
+ const wchar_t *src,
+ format_t *callback,
+ unsigned long data,
+ format_flag flags)
+{
+ char prefix[SHORT_STRING], buf[LONG_STRING], *cp;
+ wchar_t wbuf[LONG_STRING], *wptr = dest, ch;
+ char ifstring[SHORT_STRING + MB_LEN_MAX], elsestring[SHORT_STRING +
MB_LEN_MAX];
size_t wlen, count, len, col, wid;
+ char mbuf[LONG_STRING];
+ mbstate_t mbstate;
prefix[0] = '\0';
destlen--; /* save room for the terminal \0 */
@@ -1050,10 +1073,18 @@ void mutt_FormatString (char *dest, /*
/* eat the `if' part of the string */
cp = ifstring;
count = 0;
- while (count < sizeof (ifstring) && *src && *src != '?' && *src != '&')
+ memset (&mbstate, 0, sizeof (mbstate));
+ while (*src && *src != '?' && *src != '&')
{
- *cp++ = *src++;
- count++;
+ if (count < (sizeof (ifstring) - MB_LEN_MAX))
+ {
+ if ((len = wcrtomb (cp, *src++, &mbstate)) == -1)
+ len = 1, *cp = '?'; /* ??? - should I clear mbstate too? */
+ cp += len;
+ }
+ else
+ len = 0;
+ count += len;
}
*cp = 0;
@@ -1062,10 +1093,18 @@ void mutt_FormatString (char *dest, /*
src++; /* skip the & */
cp = elsestring;
count = 0;
- while (count < sizeof (elsestring) && *src && *src != '?')
+ memset (&mbstate, 0, sizeof (mbstate));
+ while (*src && *src != '?')
{
- *cp++ = *src++;
- count++;
+ if (count < (sizeof (elsestring) - MB_LEN_MAX))
+ {
+ if ((len = wcrtomb (cp, *src++, &mbstate)) == -1)
+ len = 1, *cp = '?'; /* ??? - should I clear mbstate too? */
+ cp += len;
+ }
+ else
+ len = 0;
+ count += len;
}
*cp = 0;
@@ -1086,23 +1125,33 @@ void mutt_FormatString (char *dest, /*
if (count > col)
{
count -= col; /* how many columns left on this line */
- mutt_FormatString (buf, sizeof (buf), src, callback, data, flags);
- len = mutt_strlen (buf);
- wid = mutt_strwidth (buf);
+ _FormatWString (wbuf, sizeof (wbuf), src, callback, data, flags);
+ len = wcslen (wbuf);
+ wid = wcswidth (wbuf, sizeof (wbuf));
if (count > wid)
{
- count -= wid; /* how many chars to pad */
- memset (wptr, ch, count);
- wptr += count;
- wlen += count;
- col += count;
+ count -= wid; /* how many columns to pad */
+ while (count >= wcwidth (ch))
+ {
+ *wptr++ = ch;
+ wlen++;
+ col += wcwidth (ch);
+ count -= wcwidth (ch);
+ }
+ while (count)
+ {
+ *wptr++ = ' ';
+ wlen++;
+ col++;
+ count--;
+ }
}
if (len + wlen > destlen)
len = destlen - wlen;
- memcpy (wptr, buf, len);
+ wmemcpy (wptr, wbuf, len);
wptr += len;
wlen += len;
- col += mutt_strwidth (buf);
+ col += wcswidth (wbuf, sizeof (wbuf));
}
break; /* skip rest of input */
}
@@ -1115,8 +1164,16 @@ void mutt_FormatString (char *dest, /*
if (destlen > wlen)
{
count = destlen - wlen;
- memset (wptr, ch, count);
- wptr += count;
+ while (count >= wcwidth (ch))
+ {
+ *wptr++ = ch;
+ count -= wcwidth (ch);
+ }
+ while (count)
+ {
+ *wptr++ = ' ';
+ count--;
+ }
}
break; /* skip rest of input */
}
@@ -1134,9 +1191,11 @@ void mutt_FormatString (char *dest, /*
ch = *src++;
}
-
+
/* use callback function to handle this case */
- src = callback (buf, sizeof (buf), ch, src, prefix, ifstring,
elsestring, data, flags);
+ wcstombs (mbuf, src, sizeof (mbuf));
+ mbuf[sizeof (mbuf) - 1] = 0;
+ src += callback (buf, sizeof (buf), ch, mbuf, prefix, ifstring,
elsestring, data, flags) - mbuf;
if (tolower)
mutt_strlower (buf);
@@ -1148,10 +1207,9 @@ void mutt_FormatString (char *dest, /*
*p = '_';
}
- if ((len = mutt_strlen (buf)) + wlen > destlen)
- len = (destlen - wlen > 0) ? (destlen - wlen) : 0;
-
- memcpy (wptr, buf, len);
+ len = mbstowcs (wptr, buf, destlen - wlen);
+ if (len == -1)
+ break; /* ??? */
wptr += len;
wlen += len;
col += mutt_strwidth (buf);
@@ -1185,13 +1243,13 @@ void mutt_FormatString (char *dest, /*
src++;
wptr++;
wlen++;
- col++;
+ col++; /* ??? */
}
else
{
- *wptr++ = *src++;
+ *wptr++ = *src;
wlen++;
- col++;
+ col += wcwidth (*src++);
}
}
*wptr = 0;