<<< Date Index >>>     <<< Thread Index >>>

Re: What happened to strict_mime?



* Thu Mar 22 2007 Thomas Roessler <roessler@xxxxxxxxxxxxxxxxxx>
> On 2007-03-22 10:12:08 +0100, Thomas Roessler wrote:
> 
> > > +  s = strdup (AssumedCharset);
> > > +  /* If it begins with ":", make it empty */
> > > +  if (s != strtok (s, ":"))
> > > +    *s = '\0';
> > > +  return s;
> 
> This code is actually rather wrong by itself.

Thanks for your advice again.
So I had to keep the original code for mutt_get_default_charset.

Here is my current "hg diff".

-- 
tamo
diff -r 888a57a2b5f2 charset.c
--- a/charset.c Fri Mar 23 10:32:29 2007 -0700
+++ b/charset.c Sat Mar 24 20:37:35 2007 +0900
@@ -288,18 +288,27 @@ int mutt_chscmp (const char *s, const ch
   return !ascii_strcasecmp (buffer, chs);
 }
 
+/*
+ * The content of fcharset is:
+ *
+ * - "us-ascii" if AssumedCharset is NULL or ""
+ * - otherwise, the first entry of AssumedCharset
+ *   (this entry can be "")
+ */
 char *mutt_get_default_charset ()
 {
   static char fcharset[SHORT_STRING];
-  const char *c = AssumedCharset;
   const char *c1;
 
-  if (c && *c) {
-    c1 = strchr (c, ':');
-    strfcpy (fcharset, c, c1 ? (c1 - c + 1) : sizeof (fcharset));
-    return fcharset;
-  }
-  return strcpy (fcharset, "us-ascii"); /* __STRCPY_CHECKED__ */
+  if (!AssumedCharset && !*AssumedCharset)
+    strfcpy (fcharset, "us-ascii", sizeof (fcharset));
+  else
+  {
+    c1 = strchr (AssumedCharset, ':');
+    strfcpy (fcharset, AssumedCharset,
+       c1 ? (c1 - AssumedCharset + 1) : sizeof (fcharset));
+  }
+  return fcharset;
 }
 
 #ifndef HAVE_ICONV
diff -r 888a57a2b5f2 handler.c
--- a/handler.c Fri Mar 23 10:32:29 2007 -0700
+++ b/handler.c Sat Mar 24 20:38:02 2007 +0900
@@ -1445,7 +1445,7 @@ void mutt_decode_attachment (BODY *b, ST
     char *charset = mutt_get_parameter ("charset", b->parameter);
     if (!charset && AssumedCharset && *AssumedCharset)
       charset = mutt_get_default_charset ();
-    if (charset && Charset)
+    if (charset && *charset && Charset)
       cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
   }
   else if (istext && b->charset)
diff -r 888a57a2b5f2 init.h
--- a/init.h    Fri Mar 23 10:32:29 2007 -0700
+++ b/init.h    Sat Mar 24 20:37:36 2007 +0900
@@ -169,18 +169,29 @@ struct option_t MuttVars[] = {
   { "assumed_charset", DT_STR, R_NONE, UL &AssumedCharset, UL 0},
   /*
   ** .pp
-  ** This variable is a colon-separated list of character encoding
-  ** schemes for messages without character encoding indication.
-  ** Header field values and message body content without character encoding
-  ** indication would be assumed that they are written in one of this list.
-  ** By default, all the header fields and message body without any charset
-  ** indication are assumed to be in "us-ascii".
-  ** .pp
+  ** This is a colon-separated list of possible character sets for
+  ** incoming messages. If a header field has no RFC2047-encoded word,
+  ** mutt assumes that the header contains unencoded non-ASCII words
+  ** violating the RFC. Mutt tries to detect the header's character set
+  ** by testing the list entries in turn. If the header can be converted
+  ** from none of them to ``$$charset'', mutt uses the first entry unless
+  ** the list ends with a colon. The trailing colon makes mutt bypass the
+  ** conversion process. This trial-and-error method is not performed for
+  ** message body. So only the first entry is used to show the body whose
+  ** Content-Type header has no "charset" parameter.
+  ** .pp
+  ** If unset, mutt converts unknown headers and bodies from "us-ascii".
+  ** If you want no conversion, you can trust anything with this setting:
+  ** .pp
+  **   set assumed_charset=":"
+  ** .pp
+  ** But in most cases, you should filter out invalid characters by
+  ** specifying at least one character sets.
   ** For example, Japanese users might prefer this:
   ** .pp
   **   set assumed_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
   ** .pp
-  ** However, only the first content is valid for the message body.
+  ** Note: This variable takes effect only after reloading the folder.
   */
   { "attach_charset",    DT_STR,  R_NONE, UL &AttachCharset, UL 0 },
   /*
diff -r 888a57a2b5f2 parse.c
--- a/parse.c   Fri Mar 23 10:32:29 2007 -0700
+++ b/parse.c   Sat Mar 24 20:37:36 2007 +0900
@@ -402,9 +402,11 @@ void mutt_parse_content_type (char *s, B
   if (ct->type == TYPETEXT)
   {
     if (!(pc = mutt_get_parameter ("charset", ct->parameter)))
-      mutt_set_parameter ("charset", (AssumedCharset && *AssumedCharset) ?
-                         (const char *) mutt_get_default_charset ()
-                         : "us-ascii", &ct->parameter);
+    {
+      char *assumed_charset = mutt_get_default_charset ();
+      mutt_set_parameter ("charset", *assumed_charset ?
+         assumed_charset : NULL, &ct->parameter);
+    }
   }
 
 }
diff -r 888a57a2b5f2 rfc2047.c
--- a/rfc2047.c Fri Mar 23 10:32:29 2007 -0700
+++ b/rfc2047.c Sat Mar 24 20:37:36 2007 +0900
@@ -87,39 +87,39 @@ static size_t convert_string (ICONV_CONS
   return n;
 }
 
+/* AssumedCharset must be a non-empty string */
 int convert_nonmime_string (char **ps)
 {
-  const char *c, *c1;
-
-  for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0)
-  {
-    char *u = *ps;
-    char *s;
-    char *fromcode;
-    size_t m, n;
-    size_t ulen = mutt_strlen (*ps);
-    size_t slen;
-
-    if (!u || !*u)
-      return 0;
-
-    c1 = strchr (c, ':');
-    n = c1 ? c1 - c : mutt_strlen (c);
-    if (!n)
-      return 0;
-    fromcode = safe_malloc (n + 1);
-    strfcpy (fromcode, c, n + 1);
-    m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
-    FREE (&fromcode);
+  char *c, *c1;
+  char *u = *ps;
+  char *s;
+  size_t ulen = mutt_strlen (*ps);
+  size_t slen;
+  size_t m;
+
+  if (!ulen)
+    return 0;
+
+  c =  safe_strdup (AssumedCharset);
+  for (c1 = strtok (c, ":"); c1; c1 = strtok (NULL, ":"))
+  {
+    m = convert_string (u, ulen, c1, Charset, &s, &slen);
     if (m != (size_t)(-1))
     {
+      FREE (&c);
       FREE (ps); /* __FREE_CHECKED__ */
       *ps = s;
       return 0;
     }
   }
-  mutt_convert_string (ps,
-      (const char *)mutt_get_default_charset (AssumedCharset),
+  FREE (&c);
+
+  /* pass-thru if the last entry is empty */
+  if (AssumedCharset[strlen (AssumedCharset) - 1] == ':')
+    return 0;
+
+  /* try to convert it replacing invalid chars */
+  mutt_convert_string (ps, mutt_get_default_charset (),
       Charset, M_ICONV_HOOK_FROM);
   return -1;
 }
@@ -804,7 +804,9 @@ void rfc2047_decode (char **pd)
   if (!s || !*s)
     return;
 
-  dlen = 4 * strlen (s); /* should be enough */
+  /* Normally it should not become four times longer.
+   * But convert_nonmime_string() is so tricky. */
+  dlen = 4 * strlen (s);
   d = d0 = safe_malloc (dlen + 1);
 
   while (*s && dlen > 0)
@@ -827,11 +829,14 @@ void rfc2047_decode (char **pd)
        char *t;
        size_t tlen;
 
-       n = mutt_strlen (s);
-       t = safe_malloc (n + 1);
-       strfcpy (t, s, n + 1);
+       t = safe_strdup (s);
        convert_nonmime_string (&t);
        tlen = mutt_strlen (t);
+       if (dlen < tlen)
+       {
+         dprint (4, (debugfile, "rfc2047_decode: convert_nonmime_string 
returned a very long string\n"));
+         tlen = dlen;
+       }
        strncpy (d, t, tlen);
        d += tlen;
        FREE (&t);