summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDirk-Jan C. Binnema <djcb@djcbsoftware.nl>2022-06-26 21:55:14 +0300
committerDirk-Jan C. Binnema <djcb@djcbsoftware.nl>2022-06-29 08:00:43 +0300
commitdf80935c2e22b7f5207b4b1bb43bbd694cf9cb74 (patch)
treedcd8382d8a9ac13d78910629b3394ea0658a08bb
parent6cb38c8125b7ebfb49996c8ad267a73eda36cde9 (diff)
document: index some sub-parts as well
1. Also add 'normal' terms for some indexable fields 2. Add terms for e-mail address components And add some tests. This helps for some corner-case queries (see tests). Fixes #2278 Fixes #2281
-rw-r--r--lib/message/mu-document.cc20
-rw-r--r--lib/tests/test-mu-store-query.cc49
2 files changed, 63 insertions, 6 deletions
diff --git a/lib/message/mu-document.cc b/lib/message/mu-document.cc
index 983c3cc..b0b9389 100644
--- a/lib/message/mu-document.cc
+++ b/lib/message/mu-document.cc
@@ -30,7 +30,6 @@
#include <string>
#include <utils/mu-utils.hh>
-
using namespace Mu;
constexpr uint8_t SepaChar1 = 0xfe;
@@ -46,7 +45,13 @@ add_search_term(Xapian::Document& doc, const Field& field, const std::string& va
} else if (field.is_indexable_term()) {
Xapian::TermGenerator termgen;
termgen.set_document(doc);
- termgen.index_text(utf8_flatten(val),1,field.xapian_term());
+ termgen.index_text(utf8_flatten(val), 1, field.xapian_term());
+ /* also add as 'normal' term, so some queries where the indexer
+ * eats special chars also match */
+ if (field.id != Field::Id::BodyText &&
+ field.id != Field::Id::EmbeddedText) {
+ doc.add_term(field.xapian_term(val));
+ }
} else
throw std::logic_error("not a search term");
}
@@ -143,12 +148,19 @@ Document::add(Field::Id id, const Contacts& contacts)
if (!cfield_id || *cfield_id != id)
continue;
- xdoc_.add_term(field.xapian_term(contact.email));
+ const auto e{contact.email};
+ xdoc_.add_term(field.xapian_term(e));
+
+ /* allow searching for address components, too */
+ const auto atpos = e.find('@');
+ if (atpos != std::string::npos && atpos < e.size() - 1) {
+ xdoc_.add_term(field.xapian_term(e.substr(0, atpos)));
+ xdoc_.add_term(field.xapian_term(e.substr(atpos + 1)));
+ }
if (!contact.name.empty())
termgen.index_text(utf8_flatten(contact.name), 1,
field.xapian_term());
-
cvec.emplace_back(contact.email + sepa2 + contact.name);
}
diff --git a/lib/tests/test-mu-store-query.cc b/lib/tests/test-mu-store-query.cc
index 369bce0..f2f2581 100644
--- a/lib/tests/test-mu-store-query.cc
+++ b/lib/tests/test-mu-store-query.cc
@@ -81,7 +81,6 @@ make_test_store(const std::string& test_path, const TestMap& test_map,
static void
test_simple()
{
-
const TestMap test_msgs = {{
// "sqlite-msg" "Simple mailing list message.
@@ -157,13 +156,59 @@ I said: "Aujourd'hui!"
//g_assert_cmpuint(qr->begin().date().value_or(0), ==, 123454);
}
+static void
+test_spam_address_components()
+{
+ const TestMap test_msgs = {{
+
+// "sqlite-msg" "Simple mailing list message.
+{
+"spam/cur/spam-msg:2,S",
+R"(Message-Id: <abcde@foo.bar>
+From: "Foo Example" <bar@example.com>
+To: example@example.com
+Subject: ***SPAM*** this is a test
+
+Boo!
+)"},
+}};
+ TempDir tdir;
+ auto store{make_test_store(tdir.path(), test_msgs, {})};
+
+ g_test_bug("2278");
+ g_test_bug("2281");
+
+ // matches both
+ for (auto&& expr: {
+ "SPAM",
+ "spam",
+ "/.*SPAM.*/",
+ "subject:SPAM",
+ "from:bar@example.com",
+ "subject:\\*\\*\\*SPAM\\*\\*\\*",
+ "bar",
+ "example.com"
+ }) {
+
+ if (g_test_verbose())
+ g_message("query: '%s'", expr);
+ auto qr = store.run_query(expr);
+ assert_valid_result(qr);
+ g_assert_false(qr->empty());
+ g_assert_cmpuint(qr->size(), ==, 1);
+ }
+}
int
main(int argc, char* argv[])
{
g_test_init(&argc, &argv, NULL);
- g_test_add_func("/store/query/simple", test_simple);
+ g_test_bug_base("https://github.com/djcb/mu/issues/");
+
+ g_test_add_func("/store/query/simple", test_simple);
+ g_test_add_func("/store/query/spam-address-components",
+ test_spam_address_components);
return g_test_run();
}