<<< Date Index >>>     <<< Thread Index >>>

Re: What happened to strict_mime?



On Tue, Mar 20, 2007 at 10:27:04AM +0100,
 Thomas Roessler wrote:

> On 2007-03-20 12:01:49 +0900, TAKAHASHI Tamotsu wrote:
> 
> >strict_mime=yes:
> > unset ignore_linear_white_space
> 
> >strict_mime=no:
> > set ignore_linear_white_space
> 
> Why on earth do we need two different behaviors for encoding or
> decoding subject headers as far as whitespace is concerned?  There's
> a spec-compliant way to do this, let's do that, and be done with.
> 
> If the other approach is really needed to deal with widespread
> breakage (I don't think it is), implement that, and implement it
> always.

As for Mutt before, linear-white-space between encoded-word and text was 
displayed as it was. 
Because the operation changed, I made it to the option. 

> As an aside, the code used to implement this (in the instance around
> line 816 of rfc2047.c) looks like an incredibly convoluted and
> inefficient way of saying something like this:
> 
>   if (islwsp (*s))
>   {
>     *d = ' ';
>     d++;
>     dlen--;
>   }
>   
>   while (islwsp (*s) && *s)
>     s++;

Thank you for your suggestions.
The attached patch is the one that the following correction was done.
 - Remove the option $ignore_linear_white_space.
 - By default, replace linear-white-space between encoded-word
   and text to a single space.
 - Clean up the code.

> Going further through the current code of rfc2047.c, the
> AssumedCharset related code says things like:
> 
>           n = mutt_strlen (s);
>           t = safe_malloc (n + 1);
>           strfcpy (t, s, n + 1);
> 
> That's usually spelled as
> 
>           t = safe_strdup (s);
> 
> in mutt code.

I think it is good in the code that Tamotsu wrote. 


-- 
TAKIZAWA Takashi
http://www.emaillab.org/

diff -r b0172175cc89 init.h
--- a/init.h    Tue Mar 20 13:39:29 2007 -0700
+++ b/init.h    Wed Mar 21 21:05:39 2007 +0900
@@ -850,13 +850,6 @@ struct option_t MuttVars[] = {
   ** Specifies the hostname to use after the ``@'' in local e-mail
   ** addresses.  This overrides the compile time definition obtained from
   ** /etc/resolv.conf.
-  */
-  { "ignore_linear_white_space",    DT_BOOL, R_NONE, OPTIGNORELWS, 0 },
-  /*
-  ** .pp
-  ** This option replaces linear-white-space between encoded-word
-  ** and *text to a single space to prevent the display of MIME-encoded
-  ** ``Subject'' field from being divided into multiple lines.
   */
   { "ignore_list_reply_to", DT_BOOL, R_NONE, OPTIGNORELISTREPLYTO, 0 },
   /*
diff -r b0172175cc89 mutt.h
--- a/mutt.h    Tue Mar 20 13:39:29 2007 -0700
+++ b/mutt.h    Wed Mar 21 21:05:40 2007 +0900
@@ -372,7 +372,6 @@ enum
   OPTHIDETHREADSUBJECT,
   OPTHIDETOPLIMITED,
   OPTHIDETOPMISSING,
-  OPTIGNORELWS,
   OPTIGNORELISTREPLYTO,
 #ifdef USE_IMAP
   OPTIMAPCHECKSUBSCRIBED,
diff -r b0172175cc89 rfc2047.c
--- a/rfc2047.c Tue Mar 20 13:39:29 2007 -0700
+++ b/rfc2047.c Wed Mar 21 22:54:26 2007 +0900
@@ -748,54 +748,13 @@ static const char *find_encoded_word (co
   return 0;
 }
 
-/* return length of linear-white-space */
-static size_t lwslen (const char *s, size_t n)
-{
-  const char *p = s;
-  size_t len = n;
-
-  if (n <= 0)
-    return 0;
-
-  for (; p < s + n; p++)
-    if (!strchr (" \t\r\n", *p))
-    {
-      len = (size_t)(p - s);
-      break;
-    }
-  if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */
-    len = (size_t)0;
-  return len;
-}
-
-/* return length of linear-white-space : reverse */
-static size_t lwsrlen (const char *s, size_t n)
-{
-  const char *p = s + n - 1;
-  size_t len = n;
-
-  if (n <= 0)
-    return 0;
-
-  if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */
-    return (size_t)0;
-
-  for (; p >= s; p--)
-    if (!strchr (" \t\r\n", *p))
-    {
-      len = (size_t)(s + n - 1 - p);
-      break;
-    }
-  return len;
-}
-
 /* try to decode anything that looks like a valid RFC2047 encoded
  * header field, ignoring RFC822 parsing rules
  */
 void rfc2047_decode (char **pd)
 {
-  const char *p, *q;
-  size_t m, n;
+  const char *p, *q, *r;
+  size_t n;
   int found_encoded = 0;
   char *d0, *d;
   const char *s = *pd;
@@ -812,15 +771,16 @@ void rfc2047_decode (char **pd)
     if (!(p = find_encoded_word (s, &q)))
     {
       /* no encoded words */
-      if (option (OPTIGNORELWS))
+      if (found_encoded)
       {
-        n = mutt_strlen (s);
-        if (found_encoded && (m = lwslen (s, n)) != 0)
-        {
-          if (m != n)
-            *d = ' ', d++, dlen--;
-          s += m;
-        }
+       while (*s && strchr (" \t\r\n", *s))
+         s++;
+       if (dlen > 0)
+       {
+         *d = *s ? ' ' : '\n';
+         d++;
+         dlen--;
+       }
       }
       if (AssumedCharset && *AssumedCharset)
       {
@@ -842,38 +802,32 @@ void rfc2047_decode (char **pd)
       break;
     }
 
-    if (p != s)
-    {
-      n = (size_t) (p - s);
-      /* ignore spaces between encoded word
-       * and linear-white-space between encoded word and *text */
-      if (option (OPTIGNORELWS))
+    if (p != s && found_encoded)
+    {
+      while (*s && strchr(" \t\r\n", *s))
+       s++;
+      if (p != s && dlen > 0)
       {
-        if (found_encoded && (m = lwslen (s, n)) != 0)
-        {
-          if (m != n)
-            *d = ' ', d++, dlen--;
-          n -= m, s += m;
-        }
-
-        if ((m = n - lwsrlen (s, n)) != 0)
-        {
-          if (m > dlen)
-            m = dlen;
-          memcpy (d, s, m);
-          d += m;
-          dlen -= m;
-          if (m != n)
-            *d = ' ', d++, dlen--;
-        }
+       *d = ' ';
+       d++;
+       dlen--;
       }
-      else if (!found_encoded || strspn (s, " \t\r\n") != n)
+    }
+
+    if (p != s) {
+      for (r = p - 1; s < r && strchr(" \t\r\n", *r); r--)
+       ;
+      n = (size_t) (r - s + 1);
+      if (n > dlen)
+       n = dlen;
+      memcpy (d, s, n);
+      d += n;
+      dlen -= n;
+      if (dlen > 0)
       {
-       if (n > dlen)
-         n = dlen;
-       memcpy (d, s, n);
-       d += n;
-       dlen -= n;
+       *d = ' ';
+       d++;
+       dlen--;
       }
     }