summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDirk-Jan C. Binnema <djcb@djcbsoftware.nl>2010-11-29 21:29:43 +0200
committerDirk-Jan C. Binnema <djcb@djcbsoftware.nl>2010-11-29 21:29:43 +0200
commit3d3a4f8308038ebaadaae1599106ef2b2c6deb33 (patch)
tree2fbe0df7c3fb068700794cd1bac77fedc46ed4d1 /src
parentbb5b1304e545e168d8619f026737eb2ba24f0abd (diff)
* mu-query.cc, mu-store.cc: use the escaping for some fields; update test case
Diffstat (limited to 'src')
-rw-r--r--src/mu-query.cc89
-rw-r--r--src/mu-store.cc49
-rw-r--r--src/tests/test-mu-query.c8
3 files changed, 37 insertions, 109 deletions
diff --git a/src/mu-query.cc b/src/mu-query.cc
index 08a97af..0640ff1 100644
--- a/src/mu-query.cc
+++ b/src/mu-query.cc
@@ -288,97 +288,22 @@ mu_query_destroy (MuQuery *self)
g_free (self);
}
-struct _CheckPrefix {
- const char *pfx;
- guint len;
- gboolean match;
-};
-typedef struct _CheckPrefix CheckPrefix;
-
-static void
-each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx)
-{
- const char *field_name;
- char field_shortcut;
-
- if (!cpfx || cpfx->match)
- return;
-
- field_shortcut = mu_msg_field_shortcut (mfid);
- if (field_shortcut == cpfx->pfx[0]) {
- cpfx->match = TRUE;
- return;
- }
-
- field_name = mu_msg_field_name (mfid);
- if (field_name &&
- strncmp (cpfx->pfx, field_name, cpfx->len) == 0) {
- cpfx->match = TRUE;
- return;
- }
-}
-
-/* colon is a position inside q pointing at a ':' character. function
- * determines whether the prefix is a registered prefix (like
- * 'subject' or 'from' or 's') */
-static gboolean
-is_xapian_prefix (const char *q, const char *colon)
-{
- const char *cur;
-
- if (colon == q)
- return FALSE; /* : at beginning, not a prefix */
-
- /* track back from colon until a boundary or beginning of the
- * str */
- for (cur = colon - 1; cur >= q; --cur) {
-
- if (cur == q || !isalpha (*(cur-1))) {
-
- CheckPrefix cpfx;
- memset (&cpfx, 0, sizeof(CheckPrefix));
-
- cpfx.pfx = cur;
- cpfx.len = (colon - cur);
- cpfx.match = FALSE;
-
- mu_msg_field_foreach ((MuMsgFieldForEachFunc)
- each_check_prefix,
- &cpfx);
-
- return (cpfx.match);
- }
- }
-
- return FALSE;
-}
-
/* preprocess a query to make them a bit more permissive */
char*
mu_query_preprocess (const char *query)
{
gchar *my_query;
- gchar *cur;
g_return_val_if_fail (query, NULL);
+ my_query = g_strdup (query);
- /* translate the the searchexpr to all lowercase; this
- * will fixes some of the false-negatives. A full fix
- * probably requires some custom query parser.
- */
- my_query = mu_str_normalize(query, TRUE);
-
- for (cur = my_query; *cur; ++cur) {
- if (*cur == ':') /* we found a ':' */
- /* if there's a registered xapian prefix before the
- * ':', don't touch it. Otherwise replace ':' with
- * a space'... ugly...
- */
- if (!is_xapian_prefix (my_query, cur))
- *cur = ' ';
- }
-
+ /* remove accents and turn to lower-case */
+ mu_str_normalize_in_place (my_query, TRUE);
+ /* escape '@', single '_' and ':' if it's not following a
+ * xapian-pfx with '_' */
+ mu_str_ascii_xapian_escape_in_place (my_query);
+
return my_query;
}
diff --git a/src/mu-store.cc b/src/mu-store.cc
index 1d56975..376022d 100644
--- a/src/mu-store.cc
+++ b/src/mu-store.cc
@@ -314,36 +314,36 @@ static void
add_terms_values_string (Xapian::Document& doc, MuMsg *msg,
MuMsgFieldId mfid)
{
- const char* str;
-
- str = mu_msg_get_field_string (msg, mfid);
- if (!str)
+ const char *orig;
+ char *val;
+
+ orig = mu_msg_get_field_string (msg, mfid);
+ if (!orig)
return;
-
- const std::string value (str);
+ val = g_strdup (orig);
+
const std::string prefix (1, mu_msg_field_xapian_prefix(mfid));
+
+ /* the value is what we'll display; the unchanged original */
+ if (mu_msg_field_xapian_value(mfid))
+ doc.add_value ((Xapian::valueno)mfid, val);
+
+ /* now, let's create some search terms... */
+ if (mu_msg_field_normalize (mfid))
+ mu_str_normalize_in_place (val, TRUE);
+ if (mu_msg_field_xapian_escape (mfid))
+ mu_str_ascii_xapian_escape_in_place (val);
if (mu_msg_field_xapian_index (mfid)) {
Xapian::TermGenerator termgen;
- gchar *norm (mu_str_normalize(str, TRUE));
termgen.set_document (doc);
- termgen.index_text_without_positions (norm, 1, prefix);
- g_free(norm);
- }
-
- if (mu_msg_field_xapian_term(mfid)) {
- /* add a normalized version (accents removed,
- * lowercase) */
- gchar *norm = mu_str_normalize(str, TRUE);
- doc.add_term (std::string (prefix + std::string(norm), 0,
- MU_STORE_MAX_TERM_LENGTH));
- g_free (norm);
+ termgen.index_text_without_positions (val, 1, prefix);
}
-
- /* the value is what we'll display; the unchanged original */
- if (mu_msg_field_xapian_value(mfid))
- doc.add_value ((Xapian::valueno)mfid,
- value);
+
+ if (mu_msg_field_xapian_term(mfid))
+ doc.add_term (prefix + std::string(val, 0, MU_STORE_MAX_TERM_LENGTH));
+
+ g_free (val);
}
static void
@@ -443,6 +443,9 @@ each_contact_info (MuMsgContact *contact, MsgDoc *data)
/* don't normalize e-mail address, but do lowercase it */
if (contact->address && strlen (contact->address)) {
char *lower = g_utf8_strdown (contact->address, -1);
+
+ g_strdelimit (lower, "@.", '_'); /* FIXME */
+
data->_doc->add_term
(std::string (*pfxp + lower, 0,
MU_STORE_MAX_TERM_LENGTH));
diff --git a/src/tests/test-mu-query.c b/src/tests/test-mu-query.c
index a72e301..9b15423 100644
--- a/src/tests/test-mu-query.c
+++ b/src/tests/test-mu-query.c
@@ -66,14 +66,14 @@ run_and_count_matches (const char *xpath, const char *query)
mquery = mu_query_new (xpath, NULL);
g_assert (query);
+
+ /* g_printerr ("\n=>'%s'\n", query); */
iter = mu_query_run (mquery, query, MU_MSG_FIELD_ID_NONE,
FALSE, 1, NULL);
mu_query_destroy (mquery);
g_assert (iter);
- /* g_printerr ("\n=> %s\n", query); */
-
for (count = 0; !mu_msg_iter_is_done(iter);
mu_msg_iter_next(iter), ++count);
@@ -179,10 +179,10 @@ test_mu_query_04 (void)
int i;
QResults queries[] = {
-// { "frodo@example.com", 1}, /* does not match: see mu-find (1) */
+ { "frodo@example.com", 1}, /* does not match: see mu-find (1) */
{ "f:frodo@example.com", 1},
{ "f:Frodo Baggins", 1},
-// { "bilbo@anotherexample.com", 1}, /* same things */
+ { "bilbo@anotherexample.com", 1}, /* same things */
{ "t:bilbo@anotherexample.com", 1},
{ "t:bilbo", 1},
{ "f:bilbo", 0},