On Saturday, 03 September 2005 at 23:54, Derek Martin wrote: > On Sat, Sep 03, 2005 at 10:53:09AM -0700, Brendan Cully wrote: > > 1. Silently treat the arguments to ~(b|h|H) as simple strings and pass > > them to the server. You'd lose the ability to do full-text regular > > expression searches on IMAP folders. On the other hand, client-side > > searches are currently painful enough that probably no one does > > them anyway. > > This is probably the easiest to code... but the down side is it > introduces yet another folder-format-specific inconsistency in mutt's > UI, which is something that I personally don't like. I strongly > believe that the data format should have no impact on the behavior of > the UI, ever. I would like to agree, but in practice some formats make some operations much cheaper or more expensive than others. To really preserve consistency we'd just keep the status quo - super slow client-side full text searches. When performance gets bad enough I think it's probably worth sacrificing some consistency. > > 2. Three more pattern commands. Since there aren't many letters left, > > these would end up being pretty obscure, and normal body searches > > would still have to fetch every message. I don't really like this > > option. > > But, from the standpoint of UI consistency, this is the best option. > I also think it would be useful to have a substing search feature for > other formats... It is generally less expensive than regex, and I > think more often than not substrings are what people want to search > for. Thus it is an all-around win, IMO. I'm surprised this one got a vote. It was far and away my least favourite. > Note that in truth, I never use this option whatever, so my advocacy > is based purely on what I think The Right Way is. Of course, > different opinions can be valid too. > > > 3. A modifier for ~b..., eg $~b or $~h, indicating that the parameters > > are substrings rather than regular expressions. Would people > > actually remember to use it or is it just a nuisance? > > To my eye, this seems the least desireable of the options you present. > It seems needlessly complex for the user. This option got a couple votes, and I prefer it to three new letters, so I'll probably hack it up. In the mean time, if anyone wants to play with it, here's a patch that implements option 1 (silently using strings instead of regexps), just because, as you guessed, it was the simplest to code. I haven't done a whole lot of testing with it yet, and I have my doubts about whether it interacts properly with complex server/client mixed clauses. But it seems to do the trick for the few simple cases I tried.
# HG changeset patch
# User Brendan Cully <brendan@xxxxxxxxxx>
# Node ID 9e7f6e2fd959d28e0f84b925fb4d831f5f52a037
# Parent 796908c4398ba505e4d0c2c8d48257c506aeabeb
First draft of server-side searching
diff -r 796908c4398b -r 9e7f6e2fd959 imap/command.c
--- a/imap/command.c Sun Sep 4 21:18:50 2005
+++ b/imap/command.c Mon Sep 5 03:16:32 2005
@@ -44,6 +44,7 @@
static void cmd_parse_lsub (IMAP_DATA* idata, char* s);
static void cmd_parse_fetch (IMAP_DATA* idata, char* s);
static void cmd_parse_myrights (IMAP_DATA* idata, char* s);
+static void cmd_parse_search (IMAP_DATA* idata, char* s);
static char *Capabilities[] = {
"IMAP4",
@@ -367,6 +368,8 @@
cmd_parse_lsub (idata, s);
else if (ascii_strncasecmp ("MYRIGHTS", s, 8) == 0)
cmd_parse_myrights (idata, s);
+ else if (ascii_strncasecmp ("SEARCH", s, 6) == 0)
+ cmd_parse_search (idata, s);
else if (ascii_strncasecmp ("BYE", s, 3) == 0)
{
dprint (2, (debugfile, "Handling BYE\n"));
@@ -624,3 +627,33 @@
s++;
}
}
+
+/* This should be optimised (eg with a tree or hash) */
+static int uid2msgno (IMAP_DATA* idata, unsigned int uid)
+{
+ int i;
+
+ for (i = 0; i < idata->ctx->msgcount; i++)
+ {
+ HEADER* h = idata->ctx->hdrs[i];
+ if (HEADER_DATA(h)->uid == uid)
+ break;
+ }
+
+ return i;
+}
+
+/* cmd_parse_search: store SEARCH response for later use */
+static void cmd_parse_search (IMAP_DATA* idata, char* s)
+{
+ unsigned int uid;
+
+ dprint (2, (debugfile, "Handling SEARCH\n"));
+
+ while ((s = imap_next_word (s)) && *s != '\0')
+ {
+ uid = atoi (s);
+
+ idata->ctx->hdrs[uid2msgno (idata, uid)]->matched = 1;
+ }
+}
diff -r 796908c4398b -r 9e7f6e2fd959 imap/imap.c
--- a/imap/imap.c Sun Sep 4 21:18:50 2005
+++ b/imap/imap.c Mon Sep 5 03:16:32 2005
@@ -1298,6 +1298,149 @@
return msgcount;
}
+/* returns number of patterns in the search that should be done server-side
+ * (eg are full-text) */
+static int do_search (const pattern_t* search, int allpats)
+{
+ int rc = 0;
+ const pattern_t* pat;
+
+ for (pat = search; pat; pat = pat->next)
+ {
+ switch (pat->op)
+ {
+ case M_BODY:
+ case M_HEADER:
+ case M_WHOLE_MSG:
+ rc++;
+ break;
+ default:
+ if (pat->child && do_search (pat->child, 1))
+ rc++;
+ }
+
+ if (!allpats)
+ break;
+ }
+
+ return rc;
+}
+
+/* convert mutt pattern_t to IMAP SEARCH command containing only elements
+ * that require full-text search (mutt already has what it needs for most
+ * match types, and does a better job (eg server doesn't support regexps). */
+static int imap_compile_search (const pattern_t* search, BUFFER* buf)
+{
+ char term[STRING];
+
+ if (do_search (search, 0) && search->not)
+ mutt_buffer_addstr (buf, "NOT ");
+
+ if (search->child)
+ {
+ int clauses;
+
+ if ((clauses = do_search (search->child, 1)) > 0)
+ {
+ const pattern_t* clause = search->child;
+
+ mutt_buffer_addch (buf, '(');
+
+ while (clauses)
+ {
+ if (do_search (clause, 0))
+ {
+ if (search->op == M_OR && clauses > 1)
+ mutt_buffer_addstr (buf, "OR ");
+ clauses--;
+
+ if (imap_compile_search (clause, buf) < 0)
+ return -1;
+
+ if (clauses)
+ mutt_buffer_addch (buf, ' ');
+
+ clause = clause->next;
+ }
+ }
+
+ mutt_buffer_addch (buf, ')');
+ }
+ }
+ else
+ {
+ char *delim;
+
+ switch (search->op)
+ {
+ case M_HEADER:
+ mutt_buffer_addstr (buf, "HEADER ");
+
+ /* extract header name */
+ if (! (delim = strchr (search->substr, ':')))
+ {
+ mutt_error (_("Header search without header name: %s"),
search->substr);
+ return -1;
+ }
+ *delim = '\0';
+ imap_quote_string (term, sizeof (term), search->substr);
+ mutt_buffer_addstr (buf, term);
+ mutt_buffer_addch (buf, ' ');
+
+ /* and field */
+ *delim = ':';
+ delim++;
+ SKIPWS(delim);
+ imap_quote_string (term, sizeof (term), delim);
+ mutt_buffer_addstr (buf, term);
+ break;
+ case M_BODY:
+ mutt_buffer_addstr (buf, "BODY ");
+ imap_quote_string (term, sizeof (term), search->substr);
+ mutt_buffer_addstr (buf, term);
+ break;
+ case M_WHOLE_MSG:
+ mutt_buffer_addstr (buf, "TEXT ");
+ imap_quote_string (term, sizeof (term), search->substr);
+ mutt_buffer_addstr (buf, term);
+ break;
+ default:
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+int imap_search (CONTEXT* ctx, const pattern_t* search)
+{
+ BUFFER buf;
+ IMAP_DATA* idata = (IMAP_DATA*)ctx->data;
+ int i;
+
+ for (i = 0; i < ctx->msgcount; i++)
+ ctx->hdrs[i]->matched = 0;
+
+ if (!do_search (search, 1))
+ return 0;
+
+ memset (&buf, 0, sizeof (buf));
+ mutt_buffer_addstr (&buf, "UID SEARCH ");
+ if (imap_compile_search (search, &buf) < 0)
+ {
+ FREE (&buf.data);
+ return -1;
+ }
+ if (imap_exec (idata, buf.data, 0) < 0)
+ {
+ FREE (&buf.data);
+ return -1;
+ }
+
+ FREE (&buf.data);
+ return 0;
+}
+
/* all this listing/browsing is a mess. I don't like that name is a pointer
* into idata->buf (used to be a pointer into the passed in buffer, just
* as bad), nor do I like the fact that the fetch is done here. This
diff -r 796908c4398b -r 9e7f6e2fd959 imap/imap.h
--- a/imap/imap.h Sun Sep 4 21:18:50 2005
+++ b/imap/imap.h Mon Sep 5 03:16:32 2005
@@ -41,6 +41,7 @@
void imap_close_mailbox (CONTEXT *ctx);
int imap_buffy_check (char *path);
int imap_mailbox_check (char *path, int new);
+int imap_search (CONTEXT* ctx, const pattern_t* search);
int imap_subscribe (char *path, int subscribe);
int imap_complete (char* dest, size_t dlen, char* path);
diff -r 796908c4398b -r 9e7f6e2fd959 mutt.h
--- a/mutt.h Sun Sep 4 21:18:50 2005
+++ b/mutt.h Mon Sep 5 03:16:32 2005
@@ -798,6 +798,10 @@
struct pattern_t *next;
struct pattern_t *child; /* arguments to logical op */
regex_t *rx;
+#ifdef USE_IMAP
+ /* server-side search only understands substring searches, not regexp */
+ char *substr;
+#endif
} pattern_t;
typedef struct
diff -r 796908c4398b -r 9e7f6e2fd959 pattern.c
--- a/pattern.c Sun Sep 4 21:18:50 2005
+++ b/pattern.c Mon Sep 5 03:16:32 2005
@@ -34,6 +34,13 @@
#include <stdarg.h>
#include "mutt_crypt.h"
+
+#ifdef USE_IMAP
+#include "mx.h"
+#include "imap/imap.h"
+
+static int eat_substr (pattern_t *pat, BUFFER *, BUFFER *);
+#endif
static int eat_regexp (pattern_t *pat, BUFFER *, BUFFER *);
static int eat_date (pattern_t *pat, BUFFER *, BUFFER *);
@@ -245,6 +252,23 @@
return match;
}
+static int eat_substr (pattern_t* pat, BUFFER *s, BUFFER *err)
+{
+ BUFFER buf;
+
+ memset (&buf, 0, sizeof (buf));
+ if (mutt_extract_token (&buf, s, M_TOKEN_PATTERN) != 0 || !buf.data)
+ {
+ snprintf (err->data, err->dsize, _("Error in expression: %s"), s->dptr);
+ return -1;
+ }
+
+ pat->substr = safe_strdup (buf.data);
+ FREE (&buf.data);
+
+ return 0;
+}
+
int eat_regexp (pattern_t *pat, BUFFER *s, BUFFER *err)
{
BUFFER buf;
@@ -708,6 +732,9 @@
regfree (tmp->rx);
FREE (&tmp->rx);
}
+#ifdef USE_IMAP
+ FREE (&tmp->substr);
+#endif
if (tmp->child)
mutt_pattern_free (&tmp->child);
FREE (&tmp);
@@ -821,6 +848,16 @@
mutt_pattern_free (&curlist);
return NULL;
}
+#ifdef USE_IMAP
+ if (Context && Context->magic == M_IMAP && entry->class ==
M_FULL_MSG)
+ {
+ if (eat_substr (tmp, &ps, err) == -1)
+ {
+ mutt_pattern_free (&curlist);
+ return NULL;
+ }
+ } else
+#endif
if (entry->eat_arg (tmp, &ps, err) == -1)
{
mutt_pattern_free (&curlist);
@@ -1013,6 +1050,10 @@
case M_BODY:
case M_HEADER:
case M_WHOLE_MSG:
+#ifdef USE_IMAP
+ /* IMAP search sets h->matched at search compile time */
+ return (h->matched);
+#endif
return (pat->not ^ msg_search (ctx, pat->rx, pat->op, h->msgno));
case M_SENDER:
return (pat->not ^ match_adrlist (pat->rx, flags & M_MATCH_FULL_ADDRESS,
@@ -1171,6 +1212,11 @@
return (-1);
}
+#ifdef USE_IMAP
+ if (Context->magic == M_IMAP && imap_search (Context, pat) < 0)
+ return -1;
+#endif
+
mutt_message _("Executing command on matching messages...");
#define THIS_BODY Context->hdrs[i]->content
@@ -1303,6 +1349,10 @@
{
for (i = 0; i < Context->msgcount; i++)
Context->hdrs[i]->searched = 0;
+#ifdef USE_IMAP
+ if (Context->magic == M_IMAP && imap_search (Context, SearchPattern) < 0)
+ return -1;
+#endif
unset_option (OPTSEARCHINVALID);
}
Attachment:
pgpdV0LGk2Pke.pgp
Description: PGP signature