summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDirk-Jan C. Binnema <djcb@djcbsoftware.nl>2010-11-29 21:21:55 +0200
committerDirk-Jan C. Binnema <djcb@djcbsoftware.nl>2010-11-29 21:21:55 +0200
commitbb5b1304e545e168d8619f026737eb2ba24f0abd (patch)
treefae6e6ddf4cf30b8c6d48a070f07eec38fe67000 /src
parentc6dadad9788bd2a159014cb2ce865f6c4bb1b164 (diff)
* mu-str.[ch]: add mu_str_ascii_xapian_escape_in_place, for escaping some
Xapian fields; also add some tests
Diffstat (limited to 'src')
-rw-r--r--src/mu-str.c103
-rw-r--r--src/mu-str.h23
-rw-r--r--src/tests/test-mu-str.c58
3 files changed, 180 insertions, 4 deletions
diff --git a/src/mu-str.c b/src/mu-str.c
index fd134af..fcb9de6 100644
--- a/src/mu-str.c
+++ b/src/mu-str.c
@@ -30,7 +30,7 @@
#include "mu-str.h"
#include "mu-msg-flags.h"
-
+#include "mu-msg-fields.h"
const char*
mu_str_date_s (const char* frm, time_t t)
@@ -226,3 +226,104 @@ mu_date_parse_hdwmy (const char* str)
return delta <= now ? now - delta : never;
}
+
+struct _CheckPrefix {
+ const char *pfx;
+ guint len;
+ gboolean match;
+};
+typedef struct _CheckPrefix CheckPrefix;
+
+static void
+each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx)
+{
+ const char *field_name;
+ char field_shortcut;
+
+ if (!cpfx || cpfx->match)
+ return;
+
+ field_shortcut = mu_msg_field_shortcut (mfid);
+ if (field_shortcut == cpfx->pfx[0] && cpfx->pfx[1] == ':') {
+ cpfx->match = TRUE;
+ return;
+ }
+
+ field_name = mu_msg_field_name (mfid);
+ if (field_name &&
+ strncmp (cpfx->pfx, field_name, cpfx->len) == 0) {
+ cpfx->match = TRUE;
+ return;
+ }
+}
+
+/* colon is a position inside q pointing at a ':' character. function
+ * determines whether the prefix is a registered prefix (like
+ * 'subject' or 'from' or 's') */
+static gboolean
+is_xapian_prefix (const char *q, const char *colon)
+{
+ const char *cur;
+
+ if (colon == q)
+ return FALSE; /* : at beginning, not a prefix */
+
+ /* track back from colon until a boundary or beginning of the
+ * str */
+ for (cur = colon - 1; cur >= q; --cur) {
+
+ if (cur == q || !isalpha (*(cur-1))) {
+
+ CheckPrefix cpfx;
+ memset (&cpfx, 0, sizeof(CheckPrefix));
+
+ cpfx.pfx = cur;
+ cpfx.len = (colon - cur);
+ cpfx.match = FALSE;
+
+ mu_msg_field_foreach ((MuMsgFieldForEachFunc)
+ each_check_prefix,
+ &cpfx);
+
+ return (cpfx.match);
+ }
+ }
+
+ return FALSE;
+}
+
+char*
+mu_str_ascii_xapian_escape_in_place (char *query)
+{
+ gchar *cur;
+ gboolean replace_dot;
+
+ g_return_val_if_fail (query, NULL);
+
+ /* only replace the '.' if the string looks like an e-mail
+ * address or msg-id */
+ replace_dot = (g_strstr_len(query, -1, "@") != NULL);
+
+ for (cur = query; *cur; ++cur) {
+ if (*cur == '@')
+ *cur = '_';
+
+ else if (replace_dot && *cur == '.') {
+ if (cur[1] == '.') /* don't replace '..' */
+ cur += 2;
+ else
+ *cur = '_';
+ } else if (*cur == ':') {
+ /* if there's a registered xapian prefix before the
+ * ':', don't touch it. Otherwise replace ':' with
+ * a space'... ugly...
+ */
+ if (!is_xapian_prefix (query, cur))
+ *cur = '_';
+ } else
+ *cur = tolower(*cur);
+ }
+
+ return query;
+}
+
diff --git a/src/mu-str.h b/src/mu-str.h
index c8e3dc4..6b1ce98 100644
--- a/src/mu-str.h
+++ b/src/mu-str.h
@@ -138,7 +138,7 @@ char* mu_str_summarize (const char* str,
* 'Latin-1 Supplement' and 'Latin Extended-A'
*
* @param str a valid utf8 string or NULL
- * @param downcase if TRUE, convert the string to lowercase
+ * @param downcase if TRUE, convert the string to lowercase
*
* @return the normalize string, or NULL in case of error or str was NULL
*/
@@ -153,13 +153,30 @@ char* mu_str_normalize (const char *str, gboolean downcase);
*
* @param str a valid utf8 string or NULL
* @param downcase if TRUE, convert the string to lowercase
- *
- * @return the normalize string, or NULL in case of error or str was NULL
+ *
+ * @return the normalized string, or NULL in case of error or str was
+ * NULL
*/
char* mu_str_normalize_in_place (char *str, gboolean downcase);
/**
+ * escape the string for use with xapian matching. in practice, if the
+ * string contains an '@', replace '@', single-'.' with '_'. Also,
+ * replace ':' with '_', if it's not following a xapian-prefix (such
+ * as 'subject:', 't:' etc, as defined in mu-msg-fields.[ch]).
+ * changing is done in-place (by changing the argument string). in
+ * any, case, the string will be downcased.
+ *
+ * works for ascii strings, like e-mail addresses and message-id.
+ *
+ * @param query a query string
+ *
+ * @return the escaped string or NULL in case of error
+ */
+char* mu_str_ascii_xapian_escape_in_place (char *query);
+
+/**
*
* parse strings like 1h, 3w, 2m to mean '1 hour before now', '3 weeks
* before now' and '2 * 30 days before now'
diff --git a/src/tests/test-mu-str.c b/src/tests/test-mu-str.c
index da782f6..36d1303 100644
--- a/src/tests/test-mu-str.c
+++ b/src/tests/test-mu-str.c
@@ -152,6 +152,59 @@ test_mu_str_normalize_01 (void)
}
+static void
+test_mu_str_normalize_02 (void)
+{
+ int i;
+ struct {
+ const char* word;
+ const char* norm;
+ } words [] = {
+ { "DantèS", "DanteS"},
+ { "foo", "foo" },
+ { "Föö", "Foo" },
+ { "číslO", "cislO" },
+ { "hÆvý mëÐal ümláõt", "hAevy meDal umlaot"}
+ };
+
+
+ for (i = 0; i != G_N_ELEMENTS(words); ++i) {
+ gchar *str;
+ str = mu_str_normalize (words[i].word, FALSE);
+ g_assert_cmpstr (str, ==, words[i].norm);
+ g_free (str);
+ }
+}
+
+
+static void
+test_mu_str_ascii_xapian_escape (void)
+{
+ int i;
+ struct {
+ const char* word;
+ const char* esc;
+ } words [] = {
+ { "aap@noot.mies", "aap_noot_mies"},
+ { "Foo..Bar", "foo..bar" },
+ { "subject:test@foo", "subject:test_foo" },
+ { "xxx:test@bar", "xxx_test_bar" },
+ };
+
+ for (i = 0; i != G_N_ELEMENTS(words); ++i) {
+ gchar *a = g_strdup (words[i].word);
+ mu_str_ascii_xapian_escape_in_place (a);
+ g_assert_cmpstr (a, ==, words[i].esc);
+ g_free (a);
+ }
+}
+
+
+
+
+
+
+
#if 0
static void
@@ -233,6 +286,11 @@ main (int argc, char *argv[])
/* mu_str_normalize */
g_test_add_func ("/mu-str/mu-str-normalize-01",
test_mu_str_normalize_01);
+ g_test_add_func ("/mu-str/mu-str-normalize-02",
+ test_mu_str_normalize_02);
+
+ g_test_add_func ("/mu-str/mu-str-ascii-xapian-escape",
+ test_mu_str_ascii_xapian_escape);
/* mu_str_complete_iso_date_(begin|end) */
/* g_test_add_func ("/mu-str/mu-str-complete-iso-date-begin", */