For 1.5.10: assumed_charset

To: mutt-dev@xxxxxxxx
Subject: For 1.5.10: assumed_charset
From: TAKAHASHI Tamotsu <ttakah@xxxxxxxxxxxxxxxxx>
Date: Fri, 22 Apr 2005 19:03:17 +0900
In-reply-to: <20050218133453.GC14700@xxxxxxxxxxxxxxxxxxxx>
List-unsubscribe: <mailto:mutt-dev-request@mutt.org?body=unsubscribe>
Mail-followup-to: mutt-dev@xxxxxxxx
References: <20050212224534.GA65551@xxxxxxxxxxxxxxxxxxxxxxxxxx> <20050218133453.GC14700@xxxxxxxxxxxxxxxxxxxx>
Sender: owner-mutt-dev@xxxxxxxx
User-agent: Mutt/1.5.9i

Takashi has updated his assumed_charset patch.
(http://www.emaillab.org/mutt/1.5.9/patch-1.5.9.tt.assumed_charset.2.gz)
I modified it to satisfy the requirements discussed on mutt-dev.
Here it is. I'd like to see this included in 1.5.10.

-- 
tamo

? stamp-h1
Index: PATCHES
===================================================================
RCS file: /home/roessler/cvs/mutt/PATCHES,v
retrieving revision 3.6
diff -u -u -r3.6 PATCHES
--- PATCHES     9 Dec 2002 17:44:54 -0000       3.6
+++ PATCHES     22 Apr 2005 09:52:17 -0000
@@ -0,0 +1 @@
+patch-1.5.9.tt+tamo.assumed.3
Index: charset.c
===================================================================
RCS file: /home/roessler/cvs/mutt/charset.c,v
retrieving revision 3.9
diff -u -u -r3.9 charset.c
--- charset.c   12 Feb 2005 19:52:28 -0000      3.9
+++ charset.c   22 Apr 2005 09:52:18 -0000
@@ -282,6 +282,21 @@
   return !ascii_strcasecmp (buffer, chs);
 }
 
+char *mutt_get_default_charset ()
+{
+  static char fcharset[SHORT_STRING];
+  const char *c = AssumedCharset;
+  const char *c1;
+
+  if (c && *c)
+  {
+    c1 = strchr (c, ':');
+    strfcpy (fcharset, c, c1 ? (c1 - c + 1) : sizeof (fcharset));
+  }
+  else
+    strcpy (fcharset, "us-ascii"); /* __STRCPY_CHECKED__ */
+  return fcharset;
+}
 
 #ifndef HAVE_ICONV
 
Index: charset.h
===================================================================
RCS file: /home/roessler/cvs/mutt/charset.h,v
retrieving revision 3.6
diff -u -u -r3.6 charset.h
--- charset.h   3 Mar 2003 14:01:06 -0000       3.6
+++ charset.h   22 Apr 2005 09:52:18 -0000
@@ -47,6 +47,7 @@
 void fgetconv_close (FGETCONV **);
 
 void mutt_set_langinfo_charset (void);
+char *mutt_get_default_charset ();
 
 #define M_ICONV_HOOK_FROM 1
 #define M_ICONV_HOOK_TO   2
Index: globals.h
===================================================================
RCS file: /home/roessler/cvs/mutt/globals.h,v
retrieving revision 3.16
diff -u -u -r3.16 globals.h
--- globals.h   12 Feb 2005 20:08:19 -0000      3.16
+++ globals.h   22 Apr 2005 09:52:18 -0000
@@ -32,6 +32,7 @@
 
 WHERE char *AliasFile;
 WHERE char *AliasFmt;
+WHERE char *AssumedCharset;
 WHERE char *AttachSep;
 WHERE char *Attribution;
 WHERE char *AttachFormat;
Index: handler.c
===================================================================
RCS file: /home/roessler/cvs/mutt/handler.c,v
retrieving revision 3.19
diff -u -u -r3.19 handler.c
--- handler.c   3 Feb 2005 17:01:43 -0000       3.19
+++ handler.c   22 Apr 2005 09:52:18 -0000
@@ -1728,6 +1728,8 @@
   if (istext && s->flags & M_CHARCONV)
   {
     char *charset = mutt_get_parameter ("charset", b->parameter);
+    if (!charset && AssumedCharset && *AssumedCharset)
+      charset = mutt_get_default_charset ();
     if (charset && Charset)
       cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
   }
Index: init.h
===================================================================
RCS file: /home/roessler/cvs/mutt/init.h,v
retrieving revision 3.70
diff -u -u -r3.70 init.h
--- init.h      23 Mar 2005 10:51:11 -0000      3.70
+++ init.h      22 Apr 2005 09:52:20 -0000
@@ -185,6 +185,22 @@
   ** If set, Mutt will prompt you for carbon-copy (Cc) recipients before
   ** editing the body of an outgoing message.
   */  
+  { "assumed_charset", DT_STR, R_NONE, UL &AssumedCharset, UL 0},
+  /*
+  ** .pp
+  ** This variable is a colon-separated list of character encoding
+  ** schemes for messages without character encoding indication.
+  ** Header field values and message body content without character encoding
+  ** indication would be assumed that they are written in one of this list.
+  ** By default, all the header fields and message body without any charset
+  ** indication are assumed to be in "us-ascii".
+  ** .pp
+  ** For example, Japanese users might prefer this:
+  ** .pp
+  **   set assumed_charset="iso-2022-jp:euc-jp:shift_jis"
+  ** .pp
+  ** However, only the first content is valid for the message body.
+  */
   { "attach_format",   DT_STR,  R_NONE, UL &AttachFormat, UL "%u%D%I %t%4n 
%T%.40d%> [%.7m/%.10M, %.6e%?C?, %C?, %s] " },
   /*
   ** .pp
Index: parse.c
===================================================================
RCS file: /home/roessler/cvs/mutt/parse.c,v
retrieving revision 3.15
diff -u -u -r3.15 parse.c
--- parse.c     21 Feb 2005 04:42:34 -0000      3.15
+++ parse.c     22 Apr 2005 09:52:20 -0000
@@ -213,9 +213,23 @@
 
       if (*s == '"')
       {
+        int state_ascii = 1;
        s++;
-       for (i=0; *s && *s != '"' && i < sizeof (buffer) - 1; i++, s++)
+       for (i=0; *s && i < sizeof (buffer) - 1; i++, s++)
        {
+         if (AssumedCharset && *AssumedCharset) {
+           /* As iso-2022-* has a characer of '"' with non-ascii state,
+            * ignore it. */
+           if (*s == 0x1b && i < sizeof (buffer) - 2)
+           {
+             if (s[1] == '(' && (s[2] == 'B' || s[2] == 'J'))
+               state_ascii = 1;
+             else
+               state_ascii = 0;
+           }
+         }
+         if (state_ascii && *s == '"')
+           break;
          if (*s == '\\')
          {
            /* Quote the next character */
@@ -384,7 +398,9 @@
   if (ct->type == TYPETEXT)
   {
     if (!(pc = mutt_get_parameter ("charset", ct->parameter)))
-      mutt_set_parameter ("charset", "us-ascii", &ct->parameter);
+      mutt_set_parameter ("charset", (AssumedCharset && *AssumedCharset) ?
+                         (const char *) mutt_get_default_charset ()
+                         : "us-ascii", &ct->parameter);
   }
 
 }
Index: rfc2047.c
===================================================================
RCS file: /home/roessler/cvs/mutt/rfc2047.c,v
retrieving revision 3.8
diff -u -u -r3.8 rfc2047.c
--- rfc2047.c   3 Feb 2005 17:01:44 -0000       3.8
+++ rfc2047.c   22 Apr 2005 09:52:20 -0000
@@ -87,6 +87,41 @@
   return n;
 }
 
+int convert_nonmime_string (char **ps)
+{
+  const char *p, *q;
+
+  for (p = AssumedCharset; p; p = q ? q + 1 : 0)
+  {
+    char *u = *ps;
+    char *s;
+    char *fromcode;
+    size_t m;
+    size_t ulen = mutt_strlen (*ps);
+    size_t slen;
+
+    if (!u || !*u)
+      return 0;
+
+    q = strchr (p, ':');
+    if (! (q ? q - p : mutt_strlen (p)))
+      return 0; /* pass-through with no conv */
+    fromcode = mutt_substrdup (p, q);
+    m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
+    FREE (&fromcode);
+    if (m != (size_t)(-1))
+    {
+      FREE (ps);
+      *ps = s;
+      return 0;
+    }
+  }
+  mutt_convert_string (ps,
+    (const char *)mutt_get_default_charset (AssumedCharset),
+    Charset, M_ICONV_HOOK_FROM);
+  return -1;
+}
+
 char *mutt_choose_charset (const char *fromcode, const char *charsets,
                      char *u, size_t ulen, char **d, size_t *dlen)
 {
@@ -733,6 +768,19 @@
     if (!(p = find_encoded_word (s, &q)))
     {
       /* no encoded words */
+      if (AssumedCharset && *AssumedCharset)
+      {
+       char *t;
+       size_t tlen;
+
+       t = safe_strdup (s);
+       convert_nonmime_string (&t);
+       tlen = mutt_strlen (t);
+       strncpy (d, t, tlen);
+       d += tlen;
+       FREE (&t);
+       break;
+      }
       strncpy (d, s, dlen);
       d += dlen;
       break;
@@ -770,7 +818,8 @@
 {
   while (a)
   {
-    if (a->personal && strstr (a->personal, "=?") != NULL)
+    if (a->personal && ((strstr (a->personal, "=?") != NULL) || 
+                       (AssumedCharset && *AssumedCharset)))
       rfc2047_decode (&a->personal);
 #ifdef EXACT_ADDRESS
     if (a->val && strstr (a->val, "=?") != NULL)
Index: rfc2047.h
===================================================================
RCS file: /home/roessler/cvs/mutt/rfc2047.h,v
retrieving revision 3.1
diff -u -u -r3.1 rfc2047.h
--- rfc2047.h   11 Dec 2002 11:19:40 -0000      3.1
+++ rfc2047.h   22 Apr 2005 09:52:20 -0000
@@ -18,6 +18,7 @@
 
 char *mutt_choose_charset (const char *fromcode, const char *charsets,
                      char *u, size_t ulen, char **d, size_t *dlen);
+int convert_nonmime_string (char **);
 
 void _rfc2047_encode_string (char **, int, int);
 void rfc2047_encode_adrlist (ADDRESS *, const char *);
Index: rfc2231.c
===================================================================
RCS file: /home/roessler/cvs/mutt/rfc2231.c,v
retrieving revision 3.5
diff -u -u -r3.5 rfc2231.c
--- rfc2231.c   3 Feb 2005 17:01:44 -0000       3.5
+++ rfc2231.c   22 Apr 2005 09:52:20 -0000
@@ -117,6 +117,8 @@
 
       if (option (OPTRFC2047PARAMS) && p->value && strstr (p->value, "=?"))
        rfc2047_decode (&p->value);
+      else if (AssumedCharset && *AssumedCharset)
+        convert_nonmime_string (&p->value);
 
       *last = p;
       last = &p->next;

Follow-Ups:
- Re: For 1.5.10: assumed_charset
  - From: Alain Bench

References:
- assumed_charset, file_charset and iconv-hook
  - From: Tamotsu Takahashi
- For 1.5.9: assumed_charset
  - From: TAKAHASHI Tamotsu

Prev by Date: For 1.5.10: (Re: Display bug in the index with arrow_cursor)
Next by Date: [patch] improved user-selection of folders with new mail
Previous by thread: For 1.5.9: assumed_charset
Next by thread: Re: For 1.5.10: assumed_charset
Index(es):
- Date
- Thread