Re: What happened to strict_mime?
* Wed Mar 21 2007 TAKAHASHI Tamotsu <ttakah@xxxxxxxxxxxxxxxxx>
> * Tue Mar 20 2007 Thomas Roessler <roessler@xxxxxxxxxxxxxxxxxx>
> > The way in which this code iterates through AssumedCharset in
> > convert_nonmime_string is clumsy at best. (A better way to go
> > through it would be to safe_strdup() the entire thing, use strtok on
> > the copy, and then free the temporary buffer.)
>
> Thanks for your advice. I like your implementation.
> I didn't know of strtok.
>
>
> > So, from a casual glance at this code, two suggestions:
>
> > - Please clean up this code.
>
> I'm going to try.
I've finished cleaning it up. Attached.
If Alain's "pass-thru" option is not needed,
the code can be even shorter.
--
tamo
diff -r a6da24788400 charset.c
--- a/charset.c Wed Mar 21 11:29:20 2007 -0700
+++ b/charset.c Thu Mar 22 16:15:34 2007 +0900
@@ -288,18 +288,25 @@ int mutt_chscmp (const char *s, const ch
return !ascii_strcasecmp (buffer, chs);
}
+/* Returns a buffer which should be freed later.
+ * The content of the buffer is:
+ *
+ * - "us-ascii" if AssumedCharset is NULL or ""
+ * - the first entry of AssumedCharset
+ * (this entry can be "")
+ */
char *mutt_get_default_charset ()
{
- static char fcharset[SHORT_STRING];
- const char *c = AssumedCharset;
- const char *c1;
-
- if (c && *c) {
- c1 = strchr (c, ':');
- strfcpy (fcharset, c, c1 ? (c1 - c + 1) : sizeof (fcharset));
- return fcharset;
- }
- return strcpy (fcharset, "us-ascii"); /* __STRCPY_CHECKED__ */
+ char *s;
+
+ if (!AssumedCharset || !*AssumedCharset)
+ return strdup ("us-ascii");
+
+ s = strdup (AssumedCharset);
+ /* If it begins with ":", make it empty */
+ if (s != strtok (s, ":"))
+ *s = '\0';
+ return s;
}
#ifndef HAVE_ICONV
diff -r a6da24788400 handler.c
--- a/handler.c Wed Mar 21 11:29:20 2007 -0700
+++ b/handler.c Thu Mar 22 16:15:34 2007 +0900
@@ -1443,10 +1443,10 @@ void mutt_decode_attachment (BODY *b, ST
if (istext && s->flags & M_CHARCONV)
{
char *charset = mutt_get_parameter ("charset", b->parameter);
- if (!charset && AssumedCharset && *AssumedCharset)
- charset = mutt_get_default_charset ();
- if (charset && Charset)
- cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
+ char *assumed_charset = mutt_get_default_charset ();
+ if ((charset || *assumed_charset) && Charset)
+ cd = mutt_iconv_open (Charset, charset ? charset : assumed_charset,
M_ICONV_HOOK_FROM);
+ FREE (&assumed_charset);
}
else if (istext && b->charset)
cd = mutt_iconv_open (Charset, b->charset, M_ICONV_HOOK_FROM);
diff -r a6da24788400 init.h
--- a/init.h Wed Mar 21 11:29:20 2007 -0700
+++ b/init.h Thu Mar 22 16:15:34 2007 +0900
@@ -169,18 +169,29 @@ struct option_t MuttVars[] = {
{ "assumed_charset", DT_STR, R_NONE, UL &AssumedCharset, UL 0},
/*
** .pp
- ** This variable is a colon-separated list of character encoding
- ** schemes for messages without character encoding indication.
- ** Header field values and message body content without character encoding
- ** indication would be assumed that they are written in one of this list.
- ** By default, all the header fields and message body without any charset
- ** indication are assumed to be in "us-ascii".
- ** .pp
+ ** This is a colon-separated list of possible character sets for
+ ** incoming messages. If a header field has no RFC2047-encoded word,
+ ** mutt assumes that the header contains unencoded non-ASCII words
+ ** violating the RFC. Mutt tries to detect the header's character set
+ ** by testing the list entries in turn. If the header can be converted
+ ** from none of them to ``$$charset'', mutt uses the first entry unless
+ ** the list ends with a colon. The trailing colon makes mutt bypass the
+ ** conversion process. This trial-and-error method is not performed for
+ ** message body. So only the first entry is used to show the body whose
+ ** Content-Type header has no "charset" parameter.
+ ** .pp
+ ** If unset, mutt converts unknown headers and bodies from "us-ascii".
+ ** If you want no conversion, you can trust anything with this setting:
+ ** .pp
+ ** set assumed_charset=":"
+ ** .pp
+ ** But in most cases, you should filter out invalid characters by
+ ** specifying at least one character sets.
** For example, Japanese users might prefer this:
** .pp
** set assumed_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
** .pp
- ** However, only the first content is valid for the message body.
+ ** Note: This variable takes effect only after reloading the folder.
*/
{ "attach_charset", DT_STR, R_NONE, UL &AttachCharset, UL 0 },
/*
diff -r a6da24788400 parse.c
--- a/parse.c Wed Mar 21 11:29:20 2007 -0700
+++ b/parse.c Thu Mar 22 16:15:34 2007 +0900
@@ -402,9 +402,12 @@ void mutt_parse_content_type (char *s, B
if (ct->type == TYPETEXT)
{
if (!(pc = mutt_get_parameter ("charset", ct->parameter)))
- mutt_set_parameter ("charset", (AssumedCharset && *AssumedCharset) ?
- (const char *) mutt_get_default_charset ()
- : "us-ascii", &ct->parameter);
+ {
+ char *assumed_charset = mutt_get_default_charset ();
+ mutt_set_parameter ("charset", *assumed_charset ?
+ assumed_charset : NULL, &ct->parameter);
+ FREE (&assumed_charset);
+ }
}
}
diff -r a6da24788400 rfc2047.c
--- a/rfc2047.c Wed Mar 21 11:29:20 2007 -0700
+++ b/rfc2047.c Thu Mar 22 16:15:34 2007 +0900
@@ -87,40 +87,41 @@ static size_t convert_string (ICONV_CONS
return n;
}
+/* AssumedCharset must be a non-empty string */
int convert_nonmime_string (char **ps)
{
- const char *c, *c1;
-
- for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0)
- {
- char *u = *ps;
- char *s;
- char *fromcode;
- size_t m, n;
- size_t ulen = mutt_strlen (*ps);
- size_t slen;
-
- if (!u || !*u)
- return 0;
-
- c1 = strchr (c, ':');
- n = c1 ? c1 - c : mutt_strlen (c);
- if (!n)
- return 0;
- fromcode = safe_malloc (n + 1);
- strfcpy (fromcode, c, n + 1);
- m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
- FREE (&fromcode);
+ char *c, *c1;
+ char *u = *ps;
+ char *s;
+ size_t ulen = mutt_strlen (*ps);
+ size_t slen;
+ size_t m;
+
+ if (!u || !*u)
+ return 0;
+
+ c = safe_strdup (AssumedCharset);
+ for (c1 = strtok (c, ":"); c1; c1 = strtok (NULL, ":"))
+ {
+ m = convert_string (u, ulen, c1, Charset, &s, &slen);
if (m != (size_t)(-1))
{
+ FREE (&c);
FREE (ps); /* __FREE_CHECKED__ */
*ps = s;
return 0;
}
}
- mutt_convert_string (ps,
- (const char *)mutt_get_default_charset (AssumedCharset),
+ FREE (&c);
+
+ /* pass-thru if the last entry is empty */
+ if (AssumedCharset[strlen (AssumedCharset) - 1] == ':')
+ return 0;
+
+ /* try to convert it replacing invalid chars */
+ mutt_convert_string (ps, c = mutt_get_default_charset (),
Charset, M_ICONV_HOOK_FROM);
+ FREE (&c);
return -1;
}
@@ -827,11 +828,11 @@ void rfc2047_decode (char **pd)
char *t;
size_t tlen;
- n = mutt_strlen (s);
- t = safe_malloc (n + 1);
- strfcpy (t, s, n + 1);
+ t = safe_strdup (s);
convert_nonmime_string (&t);
tlen = mutt_strlen (t);
+ if (dlen < tlen) /* truncation */
+ tlen = dlen;
strncpy (d, t, tlen);
d += tlen;
FREE (&t);