On Monday, 05 September 2005 at 19:06, Oswald Buddenhagen wrote: > On Mon, Sep 05, 2005 at 09:31:00AM -0700, Brendan Cully wrote: > > This patch adds a '$' modifier to the pattern language that turns > > regexp searches into simple string matches. > > > > Examples: > > ~b mutt: client regexp match > > > i for one would change the ~ to = instead of prepending $ - less to type > and more intuitive (to somebody with perl knowledge ;). Good idea. It gets rid of the nasty nested modifier semantics too. Here's a patch that adds a = operator, identical to ~ but produces a string match instead of a regexp match. It still does an implicit string match if the regexp contains no metacharacters.
diff -r 0946eb46a255 imap/command.c
--- a/imap/command.c Mon Sep 5 11:23:32 2005
+++ b/imap/command.c Mon Sep 5 10:47:50 2005
@@ -44,6 +44,7 @@
static void cmd_parse_lsub (IMAP_DATA* idata, char* s);
static void cmd_parse_fetch (IMAP_DATA* idata, char* s);
static void cmd_parse_myrights (IMAP_DATA* idata, char* s);
+static void cmd_parse_search (IMAP_DATA* idata, char* s);
static char *Capabilities[] = {
"IMAP4",
@@ -116,6 +117,9 @@
cmd->blen));
}
+ /* back up over '\0' */
+ if (len)
+ len--;
c = mutt_socket_readln (cmd->buf + len, cmd->blen - len, idata->conn);
if (c <= 0)
{
@@ -367,6 +371,8 @@
cmd_parse_lsub (idata, s);
else if (ascii_strncasecmp ("MYRIGHTS", s, 8) == 0)
cmd_parse_myrights (idata, s);
+ else if (ascii_strncasecmp ("SEARCH", s, 6) == 0)
+ cmd_parse_search (idata, s);
else if (ascii_strncasecmp ("BYE", s, 3) == 0)
{
dprint (2, (debugfile, "Handling BYE\n"));
@@ -624,3 +630,36 @@
s++;
}
}
+
+/* This should be optimised (eg with a tree or hash) */
+static int uid2msgno (IMAP_DATA* idata, unsigned int uid)
+{
+ int i;
+
+ for (i = 0; i < idata->ctx->msgcount; i++)
+ {
+ HEADER* h = idata->ctx->hdrs[i];
+ if (HEADER_DATA(h)->uid == uid)
+ return i;
+ }
+
+ return -1;
+}
+
+/* cmd_parse_search: store SEARCH response for later use */
+static void cmd_parse_search (IMAP_DATA* idata, char* s)
+{
+ unsigned int uid;
+ int msgno;
+
+ dprint (2, (debugfile, "Handling SEARCH\n"));
+
+ while ((s = imap_next_word (s)) && *s != '\0')
+ {
+ uid = atoi (s);
+ msgno = uid2msgno (idata, uid);
+
+ if (msgno >= 0)
+ idata->ctx->hdrs[uid2msgno (idata, uid)]->matched = 1;
+ }
+}
diff -r 0946eb46a255 imap/imap.c
--- a/imap/imap.c Mon Sep 5 11:23:32 2005
+++ b/imap/imap.c Mon Sep 5 10:47:50 2005
@@ -1298,6 +1298,151 @@
return msgcount;
}
+/* returns number of patterns in the search that should be done server-side
+ * (eg are full-text) */
+static int do_search (const pattern_t* search, int allpats)
+{
+ int rc = 0;
+ const pattern_t* pat;
+
+ for (pat = search; pat; pat = pat->next)
+ {
+ switch (pat->op)
+ {
+ case M_BODY:
+ case M_HEADER:
+ case M_WHOLE_MSG:
+ if (pat->stringmatch)
+ rc++;
+ break;
+ default:
+ if (pat->child && do_search (pat->child, 1))
+ rc++;
+ }
+
+ if (!allpats)
+ break;
+ }
+
+ return rc;
+}
+
+/* convert mutt pattern_t to IMAP SEARCH command containing only elements
+ * that require full-text search (mutt already has what it needs for most
+ * match types, and does a better job (eg server doesn't support regexps). */
+static int imap_compile_search (const pattern_t* pat, BUFFER* buf)
+{
+ char term[STRING];
+
+ if (! do_search (pat, 0))
+ return 0;
+
+ if (pat->not)
+ mutt_buffer_addstr (buf, "NOT ");
+
+ if (pat->child)
+ {
+ int clauses;
+
+ if ((clauses = do_search (pat->child, 1)) > 0)
+ {
+ const pattern_t* clause = pat->child;
+
+ mutt_buffer_addch (buf, '(');
+
+ while (clauses)
+ {
+ if (do_search (clause, 0))
+ {
+ if (pat->op == M_OR && clauses > 1)
+ mutt_buffer_addstr (buf, "OR ");
+ clauses--;
+
+ if (imap_compile_search (clause, buf) < 0)
+ return -1;
+
+ if (clauses)
+ mutt_buffer_addch (buf, ' ');
+
+ clause = clause->next;
+ }
+ }
+
+ mutt_buffer_addch (buf, ')');
+ }
+ }
+ else
+ {
+ char *delim;
+
+ switch (pat->op)
+ {
+ case M_HEADER:
+ mutt_buffer_addstr (buf, "HEADER ");
+
+ /* extract header name */
+ if (! (delim = strchr (pat->str, ':')))
+ {
+ mutt_error (_("Header search without header name: %s"), pat->str);
+ return -1;
+ }
+ *delim = '\0';
+ imap_quote_string (term, sizeof (term), pat->str);
+ mutt_buffer_addstr (buf, term);
+ mutt_buffer_addch (buf, ' ');
+
+ /* and field */
+ *delim = ':';
+ delim++;
+ SKIPWS(delim);
+ imap_quote_string (term, sizeof (term), delim);
+ mutt_buffer_addstr (buf, term);
+ break;
+ case M_BODY:
+ mutt_buffer_addstr (buf, "BODY ");
+ imap_quote_string (term, sizeof (term), pat->str);
+ mutt_buffer_addstr (buf, term);
+ break;
+ case M_WHOLE_MSG:
+ mutt_buffer_addstr (buf, "TEXT ");
+ imap_quote_string (term, sizeof (term), pat->str);
+ mutt_buffer_addstr (buf, term);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+int imap_search (CONTEXT* ctx, const pattern_t* pat)
+{
+ BUFFER buf;
+ IMAP_DATA* idata = (IMAP_DATA*)ctx->data;
+ int i;
+
+ for (i = 0; i < ctx->msgcount; i++)
+ ctx->hdrs[i]->matched = 0;
+
+ if (!do_search (pat, 1))
+ return 0;
+
+ memset (&buf, 0, sizeof (buf));
+ mutt_buffer_addstr (&buf, "UID SEARCH ");
+ if (imap_compile_search (pat, &buf) < 0)
+ {
+ FREE (&buf.data);
+ return -1;
+ }
+ if (imap_exec (idata, buf.data, 0) < 0)
+ {
+ FREE (&buf.data);
+ return -1;
+ }
+
+ FREE (&buf.data);
+ return 0;
+}
+
/* all this listing/browsing is a mess. I don't like that name is a pointer
* into idata->buf (used to be a pointer into the passed in buffer, just
* as bad), nor do I like the fact that the fetch is done here. This
diff -r 0946eb46a255 imap/imap.h
--- a/imap/imap.h Mon Sep 5 11:23:32 2005
+++ b/imap/imap.h Mon Sep 5 10:47:50 2005
@@ -41,6 +41,7 @@
void imap_close_mailbox (CONTEXT *ctx);
int imap_buffy_check (char *path);
int imap_mailbox_check (char *path, int new);
+int imap_search (CONTEXT* ctx, const pattern_t* pat);
int imap_subscribe (char *path, int subscribe);
int imap_complete (char* dest, size_t dlen, char* path);
diff -r 0946eb46a255 init.c
--- a/init.c Mon Sep 5 11:23:32 2005
+++ b/init.c Mon Sep 5 10:47:50 2005
@@ -123,7 +123,7 @@
(ch == '#' && !(flags & M_TOKEN_COMMENT)) ||
(ch == '=' && (flags & M_TOKEN_EQUAL)) ||
(ch == ';' && !(flags & M_TOKEN_SEMICOLON)) ||
- ((flags & M_TOKEN_PATTERN) && strchr ("~!|", ch)))
+ ((flags & M_TOKEN_PATTERN) && strchr ("~=!|", ch)))
break;
}
diff -r 0946eb46a255 mutt.h
--- a/mutt.h Mon Sep 5 11:23:32 2005
+++ b/mutt.h Mon Sep 5 10:47:50 2005
@@ -782,7 +782,7 @@
/* flag to mutt_pattern_comp() */
-#define M_FULL_MSG 1 /* enable body and header matching */
+#define M_FULL_MSG (1<<0) /* enable body and header matching */
typedef enum {
M_MATCH_FULL_ADDRESS = 1
@@ -791,12 +791,14 @@
typedef struct pattern_t
{
short op;
- short not;
- short alladdr;
+ unsigned int not : 1;
+ unsigned int alladdr : 1;
+ unsigned int stringmatch : 1;
int min;
int max;
struct pattern_t *next;
struct pattern_t *child; /* arguments to logical op */
+ char *str;
regex_t *rx;
} pattern_t;
diff -r 0946eb46a255 pattern.c
--- a/pattern.c Mon Sep 5 11:23:32 2005
+++ b/pattern.c Mon Sep 5 10:47:50 2005
@@ -35,9 +35,15 @@
#include "mutt_crypt.h"
+#ifdef USE_IMAP
+#include "mx.h"
+#include "imap/imap.h"
+#endif
+
static int eat_regexp (pattern_t *pat, BUFFER *, BUFFER *);
static int eat_date (pattern_t *pat, BUFFER *, BUFFER *);
static int eat_range (pattern_t *pat, BUFFER *, BUFFER *);
+static int patmatch (const pattern_t *pat, const char *buf);
struct pattern_flags
{
@@ -136,7 +142,7 @@
}
static int
-msg_search (CONTEXT *ctx, regex_t *rx, int op, int msgno)
+msg_search (CONTEXT *ctx, pattern_t* pat, int msgno)
{
char tempfile[_POSIX_PATH_MAX];
MESSAGE *msg = NULL;
@@ -164,10 +170,10 @@
return (0);
}
- if (op != M_BODY)
+ if (pat->op != M_BODY)
mutt_copy_header (msg->fp, h, s.fpout, CH_FROM | CH_DECODE, NULL);
- if (op != M_HEADER)
+ if (pat->op != M_HEADER)
{
mutt_parse_mime_message (ctx, h);
@@ -197,14 +203,14 @@
{
/* raw header / body */
fp = msg->fp;
- if (op != M_BODY)
+ if (pat->op != M_BODY)
{
fseek (fp, h->offset, 0);
lng = h->content->offset - h->offset;
}
- if (op != M_HEADER)
+ if (pat->op != M_HEADER)
{
- if (op == M_BODY)
+ if (pat->op == M_BODY)
fseek (fp, h->content->offset, 0);
lng += h->content->length;
}
@@ -216,14 +222,14 @@
/* search the file "fp" */
while (lng > 0)
{
- if (op == M_HEADER)
+ if (pat->op == M_HEADER)
{
if (*(buf = mutt_read_rfc822_line (fp, buf, &blen)) == '\0')
break;
}
else if (fgets (buf, blen - 1, fp) == NULL)
break; /* don't loop forever */
- if (regexec (rx, buf, 0, NULL, 0) == 0)
+ if (patmatch (pat, buf) == 0)
{
match = 1;
break;
@@ -257,16 +263,30 @@
snprintf (err->data, err->dsize, _("Error in expression: %s"), s->dptr);
return (-1);
}
- pat->rx = safe_malloc (sizeof (regex_t));
- r = REGCOMP (pat->rx, buf.data, REG_NEWLINE | REG_NOSUB | mutt_which_case
(buf.data));
- FREE (&buf.data);
- if (r)
- {
- regerror (r, pat->rx, err->data, err->dsize);
- regfree (pat->rx);
- FREE (&pat->rx);
- return (-1);
- }
+
+ /* If there are no RE metacharacters, use simple search anyway */
+ if (!pat->stringmatch && !strpbrk (buf.data, "|[{.*+?^$"))
+ pat->stringmatch = 1;
+
+ if (pat->stringmatch)
+ {
+ pat->str = safe_strdup (buf.data);
+ FREE (&buf.data);
+ }
+ else
+ {
+ pat->rx = safe_malloc (sizeof (regex_t));
+ r = REGCOMP (pat->rx, buf.data, REG_NEWLINE | REG_NOSUB | mutt_which_case
(buf.data));
+ FREE (&buf.data);
+ if (r)
+ {
+ regerror (r, pat->rx, err->data, err->dsize);
+ regfree (pat->rx);
+ FREE (&pat->rx);
+ return (-1);
+ }
+ }
+
return 0;
}
@@ -666,6 +686,14 @@
return 0;
}
+static int patmatch (const pattern_t* pat, const char* buf)
+{
+ if (pat->stringmatch)
+ return !strstr (buf, pat->str);
+ else
+ return regexec (pat->rx, buf, 0, NULL, 0);
+}
+
static struct pattern_flags *lookup_tag (char tag)
{
int i;
@@ -708,6 +736,7 @@
regfree (tmp->rx);
FREE (&tmp->rx);
}
+ FREE (&tmp->str);
if (tmp->child)
mutt_pattern_free (&tmp->child);
FREE (&tmp);
@@ -721,6 +750,7 @@
pattern_t *last = NULL;
int not = 0;
int alladdr = 0;
+ int stringmatch = 0;
int or = 0;
int implicit = 1; /* used to detect logical AND operator */
struct pattern_flags *entry;
@@ -770,7 +800,10 @@
implicit = 0;
not = 0;
alladdr = 0;
+ stringmatch = 0;
break;
+ case '=':
+ stringmatch = 1;
case '~':
if (implicit && or)
{
@@ -786,8 +819,10 @@
tmp = new_pattern ();
tmp->not = not;
tmp->alladdr = alladdr;
+ tmp->stringmatch = stringmatch;
not = 0;
- alladdr=0;
+ alladdr = 0;
+ stringmatch = 0;
if (last)
last->next = tmp;
@@ -896,8 +931,7 @@
return 0;
}
-static int match_adrlist (regex_t *rx, int match_personal, int alladdr,
- int n, ...)
+static int match_adrlist (pattern_t *pat, int match_personal, int n, ...)
{
va_list ap;
ADDRESS *a;
@@ -907,24 +941,22 @@
{
for (a = va_arg (ap, ADDRESS *) ; a ; a = a->next)
{
- if (alladdr^
- ((a->mailbox && regexec (rx, a->mailbox, 0, NULL, 0) == 0) ||
- (match_personal && a->personal &&
- regexec (rx, a->personal, 0, NULL, 0) == 0)))
+ if (pat->alladdr ^ ((a->mailbox && patmatch (pat, a->mailbox) == 0) ||
+ (match_personal && a->personal && patmatch (pat, a->personal) == 0)))
{
va_end (ap);
- return (! alladdr); /* Found match, or non-match if alladdr */
+ return (! pat->alladdr); /* Found match, or non-match if alladdr */
}
}
}
va_end (ap);
- return alladdr; /* No matches, or all matches if alladdr */
-}
-
-static int match_reference (regex_t *rx, LIST *refs)
+ return pat->alladdr; /* No matches, or all matches if alladdr */
+}
+
+static int match_reference (pattern_t *pat, LIST *refs)
{
for (; refs; refs = refs->next)
- if (regexec (rx, refs->data, 0, NULL, 0) == 0)
+ if (patmatch (pat, refs->data) == 0)
return 1;
return 0;
}
@@ -1013,47 +1045,50 @@
case M_BODY:
case M_HEADER:
case M_WHOLE_MSG:
- return (pat->not ^ msg_search (ctx, pat->rx, pat->op, h->msgno));
+#ifdef USE_IMAP
+ /* IMAP search sets h->matched at search compile time */
+ if (Context->magic == M_IMAP && pat->stringmatch)
+ return (h->matched);
+#endif
+ return (pat->not ^ msg_search (ctx, pat, h->msgno));
case M_SENDER:
- return (pat->not ^ match_adrlist (pat->rx, flags & M_MATCH_FULL_ADDRESS,
- pat->alladdr, 1, h->env->sender));
+ return (pat->not ^ match_adrlist (pat, flags & M_MATCH_FULL_ADDRESS, 1,
+ h->env->sender));
case M_FROM:
- return (pat->not ^ match_adrlist (pat->rx, flags & M_MATCH_FULL_ADDRESS,
- pat->alladdr, 1, h->env->from));
+ return (pat->not ^ match_adrlist (pat, flags & M_MATCH_FULL_ADDRESS, 1,
+ h->env->from));
case M_TO:
- return (pat->not ^ match_adrlist (pat->rx, flags & M_MATCH_FULL_ADDRESS,
- pat->alladdr, 1, h->env->to));
+ return (pat->not ^ match_adrlist (pat, flags & M_MATCH_FULL_ADDRESS, 1,
+ h->env->to));
case M_CC:
- return (pat->not ^ match_adrlist (pat->rx, flags & M_MATCH_FULL_ADDRESS,
- pat->alladdr, 1, h->env->cc));
+ return (pat->not ^ match_adrlist (pat, flags & M_MATCH_FULL_ADDRESS, 1,
+ h->env->cc));
case M_SUBJECT:
- return (pat->not ^ (h->env && h->env->subject && regexec (pat->rx,
h->env->subject, 0, NULL, 0) == 0));
+ return (pat->not ^ (h->env->subject && patmatch (pat, h->env->subject)
== 0));
case M_ID:
- return (pat->not ^ (h->env && h->env->message_id && regexec (pat->rx,
h->env->message_id, 0, NULL, 0) == 0));
+ return (pat->not ^ (h->env->message_id && patmatch (pat,
h->env->message_id) == 0));
case M_SCORE:
return (pat->not ^ (h->score >= pat->min && (pat->max == M_MAXRANGE ||
h->score <= pat->max)));
case M_SIZE:
return (pat->not ^ (h->content->length >= pat->min && (pat->max ==
M_MAXRANGE || h->content->length <= pat->max)));
case M_REFERENCE:
- return (pat->not ^ match_reference (pat->rx, h->env->references));
+ return (pat->not ^ match_reference (pat, h->env->references));
case M_ADDRESS:
- return (pat->not ^ (h->env && match_adrlist (pat->rx, flags &
M_MATCH_FULL_ADDRESS,
- pat->alladdr, 4, h->env->from,
- h->env->sender, h->env->to,
h->env->cc)));
+ return (pat->not ^ match_adrlist (pat, flags & M_MATCH_FULL_ADDRESS, 4,
+ h->env->from, h->env->sender,
+ h->env->to, h->env->cc));
case M_RECIPIENT:
- return (pat->not ^ (h->env && match_adrlist (pat->rx, flags &
M_MATCH_FULL_ADDRESS,
- pat->alladdr, 2, h->env->to,
h->env->cc)));
+ return (pat->not ^ match_adrlist (pat, flags & M_MATCH_FULL_ADDRESS,
+ 2, h->env->to, h->env->cc));
case M_LIST: /* known list, subscribed or not */
- return (pat->not ^ (h->env
- && mutt_is_list_cc (pat->alladdr, h->env->to, h->env->cc)));
+ return (pat->not ^ mutt_is_list_cc (pat->alladdr, h->env->to,
h->env->cc));
case M_SUBSCRIBED_LIST:
- return (pat->not ^ (h->env
- && mutt_is_list_recipient (pat->alladdr, h->env->to, h->env->cc)));
+ return (pat->not ^ mutt_is_list_recipient (pat->alladdr, h->env->to,
h->env->cc));
case M_PERSONAL_RECIP:
- return (pat->not ^ (h->env && match_user (pat->alladdr, h->env->to,
h->env->cc)));
+ return (pat->not ^ match_user (pat->alladdr, h->env->to, h->env->cc));
case M_PERSONAL_FROM:
- return (pat->not ^ (h->env && match_user (pat->alladdr, h->env->from,
NULL)));
+ return (pat->not ^ match_user (pat->alladdr, h->env->from, NULL));
case M_COLLAPSED:
return (pat->not ^ (h->collapsed && h->num_hidden > 1));
case M_CRYPT_SIGN:
@@ -1073,9 +1108,9 @@
break;
return (pat->not ^ ((h->security & APPLICATION_PGP) && (h->security &
PGPKEY)));
case M_XLABEL:
- return (pat->not ^ (h->env->x_label && regexec (pat->rx,
h->env->x_label, 0, NULL, 0) == 0));
+ return (pat->not ^ (h->env->x_label && patmatch (pat, h->env->x_label)
== 0));
case M_HORMEL:
- return (pat->not ^ (h->env->spam && h->env->spam->data && regexec
(pat->rx, h->env->spam->data, 0, NULL, 0) == 0));
+ return (pat->not ^ (h->env->spam && h->env->spam->data && patmatch (pat,
h->env->spam->data) == 0));
case M_DUPLICATED:
return (pat->not ^ (h->thread && h->thread->duplicate_thread));
case M_UNREFERENCED:
@@ -1109,7 +1144,7 @@
* equivalences?
*/
- if (!strchr (s, '~')) /* yup, so spoof a real request */
+ if (!strchr (s, '~') && !strchr (s, '=')) /* yup, so spoof a real request */
{
/* convert old tokens into the new format */
if (ascii_strcasecmp ("all", s) == 0 ||
@@ -1171,6 +1206,11 @@
return (-1);
}
+#ifdef USE_IMAP
+ if (Context->magic == M_IMAP && imap_search (Context, pat) < 0)
+ return -1;
+#endif
+
mutt_message _("Executing command on matching messages...");
#define THIS_BODY Context->hdrs[i]->content
@@ -1303,6 +1343,10 @@
{
for (i = 0; i < Context->msgcount; i++)
Context->hdrs[i]->searched = 0;
+#ifdef USE_IMAP
+ if (Context->magic == M_IMAP && imap_search (Context, SearchPattern) < 0)
+ return -1;
+#endif
unset_option (OPTSEARCHINVALID);
}
Attachment:
pgpeI6VParcaF.pgp
Description: PGP signature