For 1.5.10: assumed_charset
Takashi has updated his assumed_charset patch.
(http://www.emaillab.org/mutt/1.5.9/patch-1.5.9.tt.assumed_charset.2.gz)
I modified it to satisfy the requirements discussed on mutt-dev.
Here it is. I'd like to see this included in 1.5.10.
--
tamo
? stamp-h1
Index: PATCHES
===================================================================
RCS file: /home/roessler/cvs/mutt/PATCHES,v
retrieving revision 3.6
diff -u -u -r3.6 PATCHES
--- PATCHES 9 Dec 2002 17:44:54 -0000 3.6
+++ PATCHES 22 Apr 2005 09:52:17 -0000
@@ -0,0 +1 @@
+patch-1.5.9.tt+tamo.assumed.3
Index: charset.c
===================================================================
RCS file: /home/roessler/cvs/mutt/charset.c,v
retrieving revision 3.9
diff -u -u -r3.9 charset.c
--- charset.c 12 Feb 2005 19:52:28 -0000 3.9
+++ charset.c 22 Apr 2005 09:52:18 -0000
@@ -282,6 +282,21 @@
return !ascii_strcasecmp (buffer, chs);
}
+char *mutt_get_default_charset ()
+{
+ static char fcharset[SHORT_STRING];
+ const char *c = AssumedCharset;
+ const char *c1;
+
+ if (c && *c)
+ {
+ c1 = strchr (c, ':');
+ strfcpy (fcharset, c, c1 ? (c1 - c + 1) : sizeof (fcharset));
+ }
+ else
+ strcpy (fcharset, "us-ascii"); /* __STRCPY_CHECKED__ */
+ return fcharset;
+}
#ifndef HAVE_ICONV
Index: charset.h
===================================================================
RCS file: /home/roessler/cvs/mutt/charset.h,v
retrieving revision 3.6
diff -u -u -r3.6 charset.h
--- charset.h 3 Mar 2003 14:01:06 -0000 3.6
+++ charset.h 22 Apr 2005 09:52:18 -0000
@@ -47,6 +47,7 @@
void fgetconv_close (FGETCONV **);
void mutt_set_langinfo_charset (void);
+char *mutt_get_default_charset ();
#define M_ICONV_HOOK_FROM 1
#define M_ICONV_HOOK_TO 2
Index: globals.h
===================================================================
RCS file: /home/roessler/cvs/mutt/globals.h,v
retrieving revision 3.16
diff -u -u -r3.16 globals.h
--- globals.h 12 Feb 2005 20:08:19 -0000 3.16
+++ globals.h 22 Apr 2005 09:52:18 -0000
@@ -32,6 +32,7 @@
WHERE char *AliasFile;
WHERE char *AliasFmt;
+WHERE char *AssumedCharset;
WHERE char *AttachSep;
WHERE char *Attribution;
WHERE char *AttachFormat;
Index: handler.c
===================================================================
RCS file: /home/roessler/cvs/mutt/handler.c,v
retrieving revision 3.19
diff -u -u -r3.19 handler.c
--- handler.c 3 Feb 2005 17:01:43 -0000 3.19
+++ handler.c 22 Apr 2005 09:52:18 -0000
@@ -1728,6 +1728,8 @@
if (istext && s->flags & M_CHARCONV)
{
char *charset = mutt_get_parameter ("charset", b->parameter);
+ if (!charset && AssumedCharset && *AssumedCharset)
+ charset = mutt_get_default_charset ();
if (charset && Charset)
cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
}
Index: init.h
===================================================================
RCS file: /home/roessler/cvs/mutt/init.h,v
retrieving revision 3.70
diff -u -u -r3.70 init.h
--- init.h 23 Mar 2005 10:51:11 -0000 3.70
+++ init.h 22 Apr 2005 09:52:20 -0000
@@ -185,6 +185,22 @@
** If set, Mutt will prompt you for carbon-copy (Cc) recipients before
** editing the body of an outgoing message.
*/
+ { "assumed_charset", DT_STR, R_NONE, UL &AssumedCharset, UL 0},
+ /*
+ ** .pp
+ ** This variable is a colon-separated list of character encoding
+ ** schemes for messages without character encoding indication.
+ ** Header field values and message body content without character encoding
+ ** indication would be assumed that they are written in one of this list.
+ ** By default, all the header fields and message body without any charset
+ ** indication are assumed to be in "us-ascii".
+ ** .pp
+ ** For example, Japanese users might prefer this:
+ ** .pp
+ ** set assumed_charset="iso-2022-jp:euc-jp:shift_jis"
+ ** .pp
+ ** However, only the first content is valid for the message body.
+ */
{ "attach_format", DT_STR, R_NONE, UL &AttachFormat, UL "%u%D%I %t%4n
%T%.40d%> [%.7m/%.10M, %.6e%?C?, %C?, %s] " },
/*
** .pp
Index: parse.c
===================================================================
RCS file: /home/roessler/cvs/mutt/parse.c,v
retrieving revision 3.15
diff -u -u -r3.15 parse.c
--- parse.c 21 Feb 2005 04:42:34 -0000 3.15
+++ parse.c 22 Apr 2005 09:52:20 -0000
@@ -213,9 +213,23 @@
if (*s == '"')
{
+ int state_ascii = 1;
s++;
- for (i=0; *s && *s != '"' && i < sizeof (buffer) - 1; i++, s++)
+ for (i=0; *s && i < sizeof (buffer) - 1; i++, s++)
{
+ if (AssumedCharset && *AssumedCharset) {
+ /* As iso-2022-* has a characer of '"' with non-ascii state,
+ * ignore it. */
+ if (*s == 0x1b && i < sizeof (buffer) - 2)
+ {
+ if (s[1] == '(' && (s[2] == 'B' || s[2] == 'J'))
+ state_ascii = 1;
+ else
+ state_ascii = 0;
+ }
+ }
+ if (state_ascii && *s == '"')
+ break;
if (*s == '\\')
{
/* Quote the next character */
@@ -384,7 +398,9 @@
if (ct->type == TYPETEXT)
{
if (!(pc = mutt_get_parameter ("charset", ct->parameter)))
- mutt_set_parameter ("charset", "us-ascii", &ct->parameter);
+ mutt_set_parameter ("charset", (AssumedCharset && *AssumedCharset) ?
+ (const char *) mutt_get_default_charset ()
+ : "us-ascii", &ct->parameter);
}
}
Index: rfc2047.c
===================================================================
RCS file: /home/roessler/cvs/mutt/rfc2047.c,v
retrieving revision 3.8
diff -u -u -r3.8 rfc2047.c
--- rfc2047.c 3 Feb 2005 17:01:44 -0000 3.8
+++ rfc2047.c 22 Apr 2005 09:52:20 -0000
@@ -87,6 +87,41 @@
return n;
}
+int convert_nonmime_string (char **ps)
+{
+ const char *p, *q;
+
+ for (p = AssumedCharset; p; p = q ? q + 1 : 0)
+ {
+ char *u = *ps;
+ char *s;
+ char *fromcode;
+ size_t m;
+ size_t ulen = mutt_strlen (*ps);
+ size_t slen;
+
+ if (!u || !*u)
+ return 0;
+
+ q = strchr (p, ':');
+ if (! (q ? q - p : mutt_strlen (p)))
+ return 0; /* pass-through with no conv */
+ fromcode = mutt_substrdup (p, q);
+ m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
+ FREE (&fromcode);
+ if (m != (size_t)(-1))
+ {
+ FREE (ps);
+ *ps = s;
+ return 0;
+ }
+ }
+ mutt_convert_string (ps,
+ (const char *)mutt_get_default_charset (AssumedCharset),
+ Charset, M_ICONV_HOOK_FROM);
+ return -1;
+}
+
char *mutt_choose_charset (const char *fromcode, const char *charsets,
char *u, size_t ulen, char **d, size_t *dlen)
{
@@ -733,6 +768,19 @@
if (!(p = find_encoded_word (s, &q)))
{
/* no encoded words */
+ if (AssumedCharset && *AssumedCharset)
+ {
+ char *t;
+ size_t tlen;
+
+ t = safe_strdup (s);
+ convert_nonmime_string (&t);
+ tlen = mutt_strlen (t);
+ strncpy (d, t, tlen);
+ d += tlen;
+ FREE (&t);
+ break;
+ }
strncpy (d, s, dlen);
d += dlen;
break;
@@ -770,7 +818,8 @@
{
while (a)
{
- if (a->personal && strstr (a->personal, "=?") != NULL)
+ if (a->personal && ((strstr (a->personal, "=?") != NULL) ||
+ (AssumedCharset && *AssumedCharset)))
rfc2047_decode (&a->personal);
#ifdef EXACT_ADDRESS
if (a->val && strstr (a->val, "=?") != NULL)
Index: rfc2047.h
===================================================================
RCS file: /home/roessler/cvs/mutt/rfc2047.h,v
retrieving revision 3.1
diff -u -u -r3.1 rfc2047.h
--- rfc2047.h 11 Dec 2002 11:19:40 -0000 3.1
+++ rfc2047.h 22 Apr 2005 09:52:20 -0000
@@ -18,6 +18,7 @@
char *mutt_choose_charset (const char *fromcode, const char *charsets,
char *u, size_t ulen, char **d, size_t *dlen);
+int convert_nonmime_string (char **);
void _rfc2047_encode_string (char **, int, int);
void rfc2047_encode_adrlist (ADDRESS *, const char *);
Index: rfc2231.c
===================================================================
RCS file: /home/roessler/cvs/mutt/rfc2231.c,v
retrieving revision 3.5
diff -u -u -r3.5 rfc2231.c
--- rfc2231.c 3 Feb 2005 17:01:44 -0000 3.5
+++ rfc2231.c 22 Apr 2005 09:52:20 -0000
@@ -117,6 +117,8 @@
if (option (OPTRFC2047PARAMS) && p->value && strstr (p->value, "=?"))
rfc2047_decode (&p->value);
+ else if (AssumedCharset && *AssumedCharset)
+ convert_nonmime_string (&p->value);
*last = p;
last = &p->next;