On Monday, 05 September 2005 at 08:06, Brendan Cully wrote: > On Monday, 05 September 2005 at 11:14, James Raftery wrote: > > Hi, > > > > On Sat, Sep 03, 2005 at 10:53:09AM -0700, Brendan Cully wrote: > > > 1. Silently treat the arguments to ~(b|h|H) as simple strings and pass > > > them to the server. You'd lose the ability to do full-text regular > > > expression searches on IMAP folders. On the other hand, client-side > > > searches are currently painful enough that probably no one does > > > them anyway. > > > > Please don't remove full-text regexp searches. Yes, they're very slow > > and very inefficient but in some cases they're the only way to find what > > you might be looking for. If a user is prepared to wait I think they > > should be permitted to. > > > > > 3. A modifier for ~b..., eg $~b or $~h, indicating that the parameters > > > are substrings rather than regular expressions. Would people > > > actually remember to use it or is it just a nuisance? > > > > This would be my preference. > > I'm working on this now. This patch adds a '$' modifier to the pattern language that turns regexp searches into simple string matches. Server-side search only happens when $ is in effect. Examples: ~b mutt: client regexp match $~b mutt: server string match $(~b mutt ~f brendan): server string match, client string match $(~b mutt $~f brendan): server string match, client regexp match. I'm going to wait to add docs until it seems we have a consensus that this is the way to go...
diff -r 0946eb46a255 imap/command.c --- a/imap/command.c Mon Sep 5 11:23:32 2005 +++ b/imap/command.c Mon Sep 5 09:20:30 2005 @@ -44,6 +44,7 @@ static void cmd_parse_lsub (IMAP_DATA* idata, char* s); static void cmd_parse_fetch (IMAP_DATA* idata, char* s); static void cmd_parse_myrights (IMAP_DATA* idata, char* s); +static void cmd_parse_search (IMAP_DATA* idata, char* s); static char *Capabilities[] = { "IMAP4", @@ -116,6 +117,9 @@ cmd->blen)); } + /* back up over '\0' */ + if (len) + len--; c = mutt_socket_readln (cmd->buf + len, cmd->blen - len, idata->conn); if (c <= 0) { @@ -367,6 +371,8 @@ cmd_parse_lsub (idata, s); else if (ascii_strncasecmp ("MYRIGHTS", s, 8) == 0) cmd_parse_myrights (idata, s); + else if (ascii_strncasecmp ("SEARCH", s, 6) == 0) + cmd_parse_search (idata, s); else if (ascii_strncasecmp ("BYE", s, 3) == 0) { dprint (2, (debugfile, "Handling BYE\n")); @@ -624,3 +630,36 @@ s++; } } + +/* This should be optimised (eg with a tree or hash) */ +static int uid2msgno (IMAP_DATA* idata, unsigned int uid) +{ + int i; + + for (i = 0; i < idata->ctx->msgcount; i++) + { + HEADER* h = idata->ctx->hdrs[i]; + if (HEADER_DATA(h)->uid == uid) + return i; + } + + return -1; +} + +/* cmd_parse_search: store SEARCH response for later use */ +static void cmd_parse_search (IMAP_DATA* idata, char* s) +{ + unsigned int uid; + int msgno; + + dprint (2, (debugfile, "Handling SEARCH\n")); + + while ((s = imap_next_word (s)) && *s != '\0') + { + uid = atoi (s); + msgno = uid2msgno (idata, uid); + + if (msgno >= 0) + idata->ctx->hdrs[uid2msgno (idata, uid)]->matched = 1; + } +} diff -r 0946eb46a255 imap/imap.c --- a/imap/imap.c Mon Sep 5 11:23:32 2005 +++ b/imap/imap.c Mon Sep 5 09:20:30 2005 @@ -1298,6 +1298,151 @@ return msgcount; } +/* returns number of patterns in the search that should be done server-side + * (eg are full-text) */ +static int do_search (const pattern_t* search, int allpats) +{ + int rc = 0; + const pattern_t* pat; + + for (pat = search; pat; pat = pat->next) + { + switch (pat->op) + { + case M_BODY: + case M_HEADER: + case M_WHOLE_MSG: + if (pat->stringmatch) + rc++; + break; + default: + if (pat->child && do_search (pat->child, 1)) + rc++; + } + + if (!allpats) + break; + } + + return rc; +} + +/* convert mutt pattern_t to IMAP SEARCH command containing only elements + * that require full-text search (mutt already has what it needs for most + * match types, and does a better job (eg server doesn't support regexps). */ +static int imap_compile_search (const pattern_t* pat, BUFFER* buf) +{ + char term[STRING]; + + if (! do_search (pat, 0)) + return 0; + + if (pat->not) + mutt_buffer_addstr (buf, "NOT "); + + if (pat->child) + { + int clauses; + + if ((clauses = do_search (pat->child, 1)) > 0) + { + const pattern_t* clause = pat->child; + + mutt_buffer_addch (buf, '('); + + while (clauses) + { + if (do_search (clause, 0)) + { + if (pat->op == M_OR && clauses > 1) + mutt_buffer_addstr (buf, "OR "); + clauses--; + + if (imap_compile_search (clause, buf) < 0) + return -1; + + if (clauses) + mutt_buffer_addch (buf, ' '); + + clause = clause->next; + } + } + + mutt_buffer_addch (buf, ')'); + } + } + else + { + char *delim; + + switch (pat->op) + { + case M_HEADER: + mutt_buffer_addstr (buf, "HEADER "); + + /* extract header name */ + if (! (delim = strchr (pat->str, ':'))) + { + mutt_error (_("Header search without header name: %s"), pat->str); + return -1; + } + *delim = '\0'; + imap_quote_string (term, sizeof (term), pat->str); + mutt_buffer_addstr (buf, term); + mutt_buffer_addch (buf, ' '); + + /* and field */ + *delim = ':'; + delim++; + SKIPWS(delim); + imap_quote_string (term, sizeof (term), delim); + mutt_buffer_addstr (buf, term); + break; + case M_BODY: + mutt_buffer_addstr (buf, "BODY "); + imap_quote_string (term, sizeof (term), pat->str); + mutt_buffer_addstr (buf, term); + break; + case M_WHOLE_MSG: + mutt_buffer_addstr (buf, "TEXT "); + imap_quote_string (term, sizeof (term), pat->str); + mutt_buffer_addstr (buf, term); + break; + } + } + + return 0; +} + +int imap_search (CONTEXT* ctx, const pattern_t* pat) +{ + BUFFER buf; + IMAP_DATA* idata = (IMAP_DATA*)ctx->data; + int i; + + for (i = 0; i < ctx->msgcount; i++) + ctx->hdrs[i]->matched = 0; + + if (!do_search (pat, 1)) + return 0; + + memset (&buf, 0, sizeof (buf)); + mutt_buffer_addstr (&buf, "UID SEARCH "); + if (imap_compile_search (pat, &buf) < 0) + { + FREE (&buf.data); + return -1; + } + if (imap_exec (idata, buf.data, 0) < 0) + { + FREE (&buf.data); + return -1; + } + + FREE (&buf.data); + return 0; +} + /* all this listing/browsing is a mess. I don't like that name is a pointer * into idata->buf (used to be a pointer into the passed in buffer, just * as bad), nor do I like the fact that the fetch is done here. This diff -r 0946eb46a255 imap/imap.h --- a/imap/imap.h Mon Sep 5 11:23:32 2005 +++ b/imap/imap.h Mon Sep 5 09:20:30 2005 @@ -41,6 +41,7 @@ void imap_close_mailbox (CONTEXT *ctx); int imap_buffy_check (char *path); int imap_mailbox_check (char *path, int new); +int imap_search (CONTEXT* ctx, const pattern_t* pat); int imap_subscribe (char *path, int subscribe); int imap_complete (char* dest, size_t dlen, char* path); diff -r 0946eb46a255 mutt.h --- a/mutt.h Mon Sep 5 11:23:32 2005 +++ b/mutt.h Mon Sep 5 09:20:30 2005 @@ -782,7 +782,8 @@ /* flag to mutt_pattern_comp() */ -#define M_FULL_MSG 1 /* enable body and header matching */ +#define M_FULL_MSG (1<<0) /* enable body and header matching */ +#define M_STRINGMATCH (1<<1) /* use string match instead of regexp */ typedef enum { M_MATCH_FULL_ADDRESS = 1 @@ -791,12 +792,14 @@ typedef struct pattern_t { short op; - short not; - short alladdr; + unsigned int not : 1; + unsigned int alladdr : 1; + unsigned int stringmatch : 1; int min; int max; struct pattern_t *next; struct pattern_t *child; /* arguments to logical op */ + char *str; regex_t *rx; } pattern_t; diff -r 0946eb46a255 pattern.c --- a/pattern.c Mon Sep 5 11:23:32 2005 +++ b/pattern.c Mon Sep 5 09:20:30 2005 @@ -35,9 +35,15 @@ #include "mutt_crypt.h" +#ifdef USE_IMAP +#include "mx.h" +#include "imap/imap.h" +#endif + static int eat_regexp (pattern_t *pat, BUFFER *, BUFFER *); static int eat_date (pattern_t *pat, BUFFER *, BUFFER *); static int eat_range (pattern_t *pat, BUFFER *, BUFFER *); +static int patmatch (const pattern_t *pat, const char *buf); struct pattern_flags { @@ -136,7 +142,7 @@ } static int -msg_search (CONTEXT *ctx, regex_t *rx, int op, int msgno) +msg_search (CONTEXT *ctx, pattern_t* pat, int msgno) { char tempfile[_POSIX_PATH_MAX]; MESSAGE *msg = NULL; @@ -164,10 +170,10 @@ return (0); } - if (op != M_BODY) + if (pat->op != M_BODY) mutt_copy_header (msg->fp, h, s.fpout, CH_FROM | CH_DECODE, NULL); - if (op != M_HEADER) + if (pat->op != M_HEADER) { mutt_parse_mime_message (ctx, h); @@ -197,14 +203,14 @@ { /* raw header / body */ fp = msg->fp; - if (op != M_BODY) + if (pat->op != M_BODY) { fseek (fp, h->offset, 0); lng = h->content->offset - h->offset; } - if (op != M_HEADER) + if (pat->op != M_HEADER) { - if (op == M_BODY) + if (pat->op == M_BODY) fseek (fp, h->content->offset, 0); lng += h->content->length; } @@ -216,14 +222,14 @@ /* search the file "fp" */ while (lng > 0) { - if (op == M_HEADER) + if (pat->op == M_HEADER) { if (*(buf = mutt_read_rfc822_line (fp, buf, &blen)) == '\0') break; } else if (fgets (buf, blen - 1, fp) == NULL) break; /* don't loop forever */ - if (regexec (rx, buf, 0, NULL, 0) == 0) + if (patmatch (pat, buf) == 0) { match = 1; break; @@ -257,16 +263,26 @@ snprintf (err->data, err->dsize, _("Error in expression: %s"), s->dptr); return (-1); } - pat->rx = safe_malloc (sizeof (regex_t)); - r = REGCOMP (pat->rx, buf.data, REG_NEWLINE | REG_NOSUB | mutt_which_case (buf.data)); - FREE (&buf.data); - if (r) - { - regerror (r, pat->rx, err->data, err->dsize); - regfree (pat->rx); - FREE (&pat->rx); - return (-1); - } + + if (pat->stringmatch) + { + pat->str = safe_strdup (buf.data); + FREE (&buf.data); + } + else + { + pat->rx = safe_malloc (sizeof (regex_t)); + r = REGCOMP (pat->rx, buf.data, REG_NEWLINE | REG_NOSUB | mutt_which_case (buf.data)); + FREE (&buf.data); + if (r) + { + regerror (r, pat->rx, err->data, err->dsize); + regfree (pat->rx); + FREE (&pat->rx); + return (-1); + } + } + return 0; } @@ -666,6 +682,14 @@ return 0; } +static int patmatch (const pattern_t* pat, const char* buf) +{ + if (pat->stringmatch) + return !strstr (buf, pat->str); + else + return regexec (pat->rx, buf, 0, NULL, 0); +} + static struct pattern_flags *lookup_tag (char tag) { int i; @@ -708,6 +732,7 @@ regfree (tmp->rx); FREE (&tmp->rx); } + FREE (&tmp->str); if (tmp->child) mutt_pattern_free (&tmp->child); FREE (&tmp); @@ -721,6 +746,7 @@ pattern_t *last = NULL; int not = 0; int alladdr = 0; + int stringmatch = 0; int or = 0; int implicit = 1; /* used to detect logical AND operator */ struct pattern_flags *entry; @@ -731,6 +757,9 @@ memset (&ps, 0, sizeof (ps)); ps.dptr = s; ps.dsize = mutt_strlen (s); + + if (flags & M_STRINGMATCH) + stringmatch = 1; while (*ps.dptr) { @@ -745,6 +774,10 @@ ps.dptr++; not = !not; break; + case '$': + ps.dptr++; + stringmatch = !stringmatch; + break; case '|': if (!or) { @@ -770,6 +803,7 @@ implicit = 0; not = 0; alladdr = 0; + stringmatch = flags & M_STRINGMATCH ? 1 : 0; break; case '~': if (implicit && or) @@ -786,8 +820,10 @@ tmp = new_pattern (); tmp->not = not; tmp->alladdr = alladdr; + tmp->stringmatch = stringmatch; not = 0; - alladdr=0; + alladdr = 0; + stringmatch = flags & M_STRINGMATCH ? 1 : 0; if (last) last->next = tmp; @@ -839,6 +875,10 @@ } /* compile the sub-expression */ buf = mutt_substrdup (ps.dptr + 1, p); + if (stringmatch) + flags |= M_STRINGMATCH; + else + flags &= ~M_STRINGMATCH; if ((tmp = mutt_pattern_comp (buf, flags, err)) == NULL) { FREE (&buf); @@ -896,8 +936,7 @@ return 0; } -static int match_adrlist (regex_t *rx, int match_personal, int alladdr, - int n, ...) +static int match_adrlist (pattern_t *pat, int match_personal, int n, ...) { va_list ap; ADDRESS *a; @@ -907,24 +946,22 @@ { for (a = va_arg (ap, ADDRESS *) ; a ; a = a->next) { - if (alladdr^ - ((a->mailbox && regexec (rx, a->mailbox, 0, NULL, 0) == 0) || - (match_personal && a->personal && - regexec (rx, a->personal, 0, NULL, 0) == 0))) + if (pat->alladdr ^ ((a->mailbox && patmatch (pat, a->mailbox) == 0) || + (match_personal && a->personal && patmatch (pat, a->personal) == 0))) { va_end (ap); - return (! alladdr); /* Found match, or non-match if alladdr */ + return (! pat->alladdr); /* Found match, or non-match if alladdr */ } } } va_end (ap); - return alladdr; /* No matches, or all matches if alladdr */ -} - -static int match_reference (regex_t *rx, LIST *refs) + return pat->alladdr; /* No matches, or all matches if alladdr */ +} + +static int match_reference (pattern_t *pat, LIST *refs) { for (; refs; refs = refs->next) - if (regexec (rx, refs->data, 0, NULL, 0) == 0) + if (patmatch (pat, refs->data) == 0) return 1; return 0; } @@ -1013,47 +1050,50 @@ case M_BODY: case M_HEADER: case M_WHOLE_MSG: - return (pat->not ^ msg_search (ctx, pat->rx, pat->op, h->msgno)); +#ifdef USE_IMAP + /* IMAP search sets h->matched at search compile time */ + if (Context->magic == M_IMAP && pat->stringmatch) + return (h->matched); +#endif + return (pat->not ^ msg_search (ctx, pat, h->msgno)); case M_SENDER: - return (pat->not ^ match_adrlist (pat->rx, flags & M_MATCH_FULL_ADDRESS, - pat->alladdr, 1, h->env->sender)); + return (pat->not ^ match_adrlist (pat, flags & M_MATCH_FULL_ADDRESS, 1, + h->env->sender)); case M_FROM: - return (pat->not ^ match_adrlist (pat->rx, flags & M_MATCH_FULL_ADDRESS, - pat->alladdr, 1, h->env->from)); + return (pat->not ^ match_adrlist (pat, flags & M_MATCH_FULL_ADDRESS, 1, + h->env->from)); case M_TO: - return (pat->not ^ match_adrlist (pat->rx, flags & M_MATCH_FULL_ADDRESS, - pat->alladdr, 1, h->env->to)); + return (pat->not ^ match_adrlist (pat, flags & M_MATCH_FULL_ADDRESS, 1, + h->env->to)); case M_CC: - return (pat->not ^ match_adrlist (pat->rx, flags & M_MATCH_FULL_ADDRESS, - pat->alladdr, 1, h->env->cc)); + return (pat->not ^ match_adrlist (pat, flags & M_MATCH_FULL_ADDRESS, 1, + h->env->cc)); case M_SUBJECT: - return (pat->not ^ (h->env && h->env->subject && regexec (pat->rx, h->env->subject, 0, NULL, 0) == 0)); + return (pat->not ^ (h->env->subject && patmatch (pat, h->env->subject) == 0)); case M_ID: - return (pat->not ^ (h->env && h->env->message_id && regexec (pat->rx, h->env->message_id, 0, NULL, 0) == 0)); + return (pat->not ^ (h->env->message_id && patmatch (pat, h->env->message_id) == 0)); case M_SCORE: return (pat->not ^ (h->score >= pat->min && (pat->max == M_MAXRANGE || h->score <= pat->max))); case M_SIZE: return (pat->not ^ (h->content->length >= pat->min && (pat->max == M_MAXRANGE || h->content->length <= pat->max))); case M_REFERENCE: - return (pat->not ^ match_reference (pat->rx, h->env->references)); + return (pat->not ^ match_reference (pat, h->env->references)); case M_ADDRESS: - return (pat->not ^ (h->env && match_adrlist (pat->rx, flags & M_MATCH_FULL_ADDRESS, - pat->alladdr, 4, h->env->from, - h->env->sender, h->env->to, h->env->cc))); + return (pat->not ^ match_adrlist (pat, flags & M_MATCH_FULL_ADDRESS, 4, + h->env->from, h->env->sender, + h->env->to, h->env->cc)); case M_RECIPIENT: - return (pat->not ^ (h->env && match_adrlist (pat->rx, flags & M_MATCH_FULL_ADDRESS, - pat->alladdr, 2, h->env->to, h->env->cc))); + return (pat->not ^ match_adrlist (pat, flags & M_MATCH_FULL_ADDRESS, + 2, h->env->to, h->env->cc)); case M_LIST: /* known list, subscribed or not */ - return (pat->not ^ (h->env - && mutt_is_list_cc (pat->alladdr, h->env->to, h->env->cc))); + return (pat->not ^ mutt_is_list_cc (pat->alladdr, h->env->to, h->env->cc)); case M_SUBSCRIBED_LIST: - return (pat->not ^ (h->env - && mutt_is_list_recipient (pat->alladdr, h->env->to, h->env->cc))); + return (pat->not ^ mutt_is_list_recipient (pat->alladdr, h->env->to, h->env->cc)); case M_PERSONAL_RECIP: - return (pat->not ^ (h->env && match_user (pat->alladdr, h->env->to, h->env->cc))); + return (pat->not ^ match_user (pat->alladdr, h->env->to, h->env->cc)); case M_PERSONAL_FROM: - return (pat->not ^ (h->env && match_user (pat->alladdr, h->env->from, NULL))); + return (pat->not ^ match_user (pat->alladdr, h->env->from, NULL)); case M_COLLAPSED: return (pat->not ^ (h->collapsed && h->num_hidden > 1)); case M_CRYPT_SIGN: @@ -1073,9 +1113,9 @@ break; return (pat->not ^ ((h->security & APPLICATION_PGP) && (h->security & PGPKEY))); case M_XLABEL: - return (pat->not ^ (h->env->x_label && regexec (pat->rx, h->env->x_label, 0, NULL, 0) == 0)); + return (pat->not ^ (h->env->x_label && patmatch (pat, h->env->x_label) == 0)); case M_HORMEL: - return (pat->not ^ (h->env->spam && h->env->spam->data && regexec (pat->rx, h->env->spam->data, 0, NULL, 0) == 0)); + return (pat->not ^ (h->env->spam && h->env->spam->data && patmatch (pat, h->env->spam->data) == 0)); case M_DUPLICATED: return (pat->not ^ (h->thread && h->thread->duplicate_thread)); case M_UNREFERENCED: @@ -1171,6 +1211,11 @@ return (-1); } +#ifdef USE_IMAP + if (Context->magic == M_IMAP && imap_search (Context, pat) < 0) + return -1; +#endif + mutt_message _("Executing command on matching messages..."); #define THIS_BODY Context->hdrs[i]->content @@ -1303,6 +1348,10 @@ { for (i = 0; i < Context->msgcount; i++) Context->hdrs[i]->searched = 0; +#ifdef USE_IMAP + if (Context->magic == M_IMAP && imap_search (Context, SearchPattern) < 0) + return -1; +#endif unset_option (OPTSEARCHINVALID); }
Attachment:
pgpE1tAEpjkOV.pgp
Description: PGP signature