summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDirk-Jan C. Binnema <djcb@djcbsoftware.nl>2024-04-01 20:51:40 +0300
committerDirk-Jan C. Binnema <djcb@djcbsoftware.nl>2024-04-02 07:40:20 +0300
commitd13feb2d992d52e36eb3e767619945ac02163326 (patch)
tree9376ebe38248ef610a590432f95a268a068fc0d3
parent2e40852abc5d443a331a726fc2ae7f0dfa13f38e (diff)
mu-contact: move email validation to contacts cache
So we can be sure the regexp is initialized. This _may_ help for https://bugzilla.opensuse.org/show_bug.cgi?id=1221861 though it is very hard to tell!
-rw-r--r--lib/message/mu-contact.cc46
-rw-r--r--lib/mu-contacts-cache.cc56
-rw-r--r--lib/mu-contacts-cache.hh10
-rw-r--r--mu/mu-cmd-cfind.cc2
4 files changed, 58 insertions, 56 deletions
diff --git a/lib/message/mu-contact.cc b/lib/message/mu-contact.cc
index 4ce9a5c..c6439b0 100644
--- a/lib/message/mu-contact.cc
+++ b/lib/message/mu-contact.cc
@@ -1,5 +1,5 @@
/*
-** Copyright (C) 2022-2023 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
+** Copyright (C) 2022-2024 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
@@ -20,7 +20,6 @@
#include "mu-contact.hh"
#include "mu-message.hh"
#include "utils/mu-utils.hh"
-#include "utils/mu-regex.hh"
#include "mu-mime-object.hh"
#include <gmime/gmime.h>
@@ -47,34 +46,6 @@ Contact::display_name() const
return Mu::quote(name) + " <" + email + '>';
}
-static Regex email_rx;
-
-bool
-Contact::has_valid_email() const
-{
- /* regexp as per:
- * https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
- *
- * "This requirement is a willful violation of RFC 5322, which defines a
- * syntax for email addresses that is simultaneously too strict (before
- * the "@" character), too vague (after the "@" character), and too lax
- * (allowing comments, whitespace characters, and quoted strings in
- * manners unfamiliar to most users) to be of practical use here."
- */
- constexpr auto email_rx_str =
- R"(^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$)";
- /* avoid std::regex here, since speed matters. PCRE is faster */
- if (!email_rx) {
- if (auto&& res = Regex::make(email_rx_str, G_REGEX_OPTIMIZE); !res) {
- g_error("BUG: error in regex: %s", res.error().what()); /* terminates process */
- } else
- email_rx = res.value();
- }
-
- return email_rx.matches(email);
-}
-
-
std::string
Mu::to_string(const Mu::Contacts& contacts)
{
@@ -120,7 +91,6 @@ test_ctor_foo()
assert_equal(c.email, "foo@example.com");
assert_equal(c.name, "Foo Bar");
- g_assert_true(c.has_valid_email());
g_assert_true(*c.field_id() == Field::Id::Bcc);
g_assert_cmpuint(c.message_date,==,1645214647);
@@ -142,7 +112,6 @@ test_ctor_blinky()
assert_equal(c.email, "bar@example.com");
assert_equal(c.name, "Blinky");
- g_assert_true(c.has_valid_email());
g_assert_true(c.personal);
g_assert_cmpuint(c.frequency,==,13);
g_assert_cmpuint(c.tstamp,==,12345);
@@ -166,7 +135,6 @@ test_ctor_cleanup()
assert_equal(c.email, "bar@example.com");
assert_equal(c.name, "Bli ky");
g_assert_true(c.personal);
- g_assert_true(c.has_valid_email());
g_assert_cmpuint(c.frequency,==,13);
g_assert_cmpuint(c.tstamp,==,12345);
g_assert_cmpuint(c.message_date,==,1645215014);
@@ -188,7 +156,6 @@ test_encode()
assert_equal(c.email, "cassius@example.com");
assert_equal(c.name, "Ali, Muhammad \"The Greatest\"");
- g_assert_true(c.has_valid_email());
g_assert_false(c.personal);
g_assert_cmpuint(c.frequency,==,333);
g_assert_cmpuint(c.tstamp,==,768);
@@ -207,7 +174,6 @@ test_sender()
assert_equal(c.email, "aa@example.com");
assert_equal(c.name, "Anders Ångström");
- g_assert_true(c.has_valid_email());
g_assert_false(c.personal);
g_assert_cmpuint(c.frequency,==,1);
g_assert_cmpuint(c.message_date,==,54321);
@@ -222,15 +188,6 @@ test_misc()
g_assert_false(!!contact_type_from_field_id(Field::Id::Subject));
}
-static void
-test_broken_email()
-{
- Contact c{"a***@@booa@example..com", "abcdef",
- Contact::Type::Sender, 54321};
-
- g_assert_false(c.has_valid_email());
-}
-
int
main(int argc, char* argv[])
{
@@ -244,7 +201,6 @@ main(int argc, char* argv[])
g_test_add_func("/message/contact/sender", test_sender);
g_test_add_func("/message/contact/misc", test_misc);
- g_test_add_func("/message/contact/broken-email", test_broken_email);
return g_test_run();
}
diff --git a/lib/mu-contacts-cache.cc b/lib/mu-contacts-cache.cc
index 4e60ce6..b9b9b50 100644
--- a/lib/mu-contacts-cache.cc
+++ b/lib/mu-contacts-cache.cc
@@ -1,5 +1,5 @@
/*
-** Copyright (C) 2019-2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
+** Copyright (C) 2019-2024 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
@@ -53,13 +53,19 @@ struct ContactsCache::Private {
personal_rx_{make_rx_matchers<Config::Id::PersonalAddresses>()},
ignored_plain_{make_matchers<Config::Id::IgnoredAddresses>()},
ignored_rx_{make_rx_matchers<Config::Id::IgnoredAddresses>()},
- dirty_{0} {}
+ dirty_{0},
+ email_rx_{unwrap(Regex::make(email_rx_str, G_REGEX_OPTIMIZE))}
+ {}
~Private() { serialize(); }
ContactUMap deserialize(const std::string&) const;
void serialize() const;
+ bool is_valid_email(const std::string& email) const {
+ return email_rx_.matches(email);
+ }
+
Config& config_db_;
ContactUMap contacts_;
mutable std::mutex mtx_;
@@ -70,7 +76,8 @@ struct ContactsCache::Private {
const StringVec ignored_plain_;
const std::vector<Regex> ignored_rx_;
- mutable size_t dirty_;
+ mutable size_t dirty_;
+ Regex email_rx_;
private:
static bool is_rx(const std::string& p) {
@@ -100,6 +107,19 @@ private:
}
return rxvec;
}
+
+ /* regexp as per:
+ * https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
+ *
+ * "This requirement is a willful violation of RFC 5322, which defines a
+ * syntax for email addresses that is simultaneously too strict (before
+ * the "@" character), too vague (after the "@" character), and too lax
+ * (allowing comments, whitespace characters, and quoted strings in
+ * manners unfamiliar to most users) to be of practical use here."
+ */
+ static constexpr auto email_rx_str =
+ R"(^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$)";
+
};
ContactUMap
@@ -176,7 +196,7 @@ ContactsCache::add(Contact&& contact)
{
/* we do _not_ cache invalid email addresses, so we won't offer them in completions etc. It
* should be _rare_, but we've seen cases ( broken local messages) */
- if (!contact.has_valid_email()) {
+ if (!is_valid(contact.email)) {
mu_warning("not caching invalid e-mail address '{}'", contact.email);
return;
}
@@ -346,6 +366,13 @@ ContactsCache::is_ignored(const std::string& addr) const
return address_matches(addr, priv_->ignored_plain_, priv_->ignored_rx_);
}
+bool
+ContactsCache::is_valid(const std::string& addr) const
+{
+ return priv_->is_valid_email(addr);
+}
+
+
#ifdef BUILD_TESTS
/*
* Tests.
@@ -554,17 +581,28 @@ test_mu_contacts_cache_sort()
}
}
+static void
+test_mu_contacts_valid_address()
+{
+ MemDb xdb{};
+ Config cdb{xdb};
+ ContactsCache ccache(cdb);
+
+ g_assert_true(ccache.is_valid("a@example.com"));
+ g_assert_false(ccache.is_valid("a***@@booa@example..com"));
+}
int
main(int argc, char* argv[])
{
mu_test_init(&argc, &argv);
- g_test_add_func("/lib/contacts-cache/base", test_mu_contacts_cache_base);
- g_test_add_func("/lib/contacts-cache/personal", test_mu_contacts_cache_personal);
- g_test_add_func("/lib/contacts-cache/ignored", test_mu_contacts_cache_ignored);
- g_test_add_func("/lib/contacts-cache/for-each", test_mu_contacts_cache_foreach);
- g_test_add_func("/lib/contacts-cache/sort", test_mu_contacts_cache_sort);
+ g_test_add_func("/contacts-cache/base", test_mu_contacts_cache_base);
+ g_test_add_func("/contacts-cache/personal", test_mu_contacts_cache_personal);
+ g_test_add_func("/contacts-cache/ignored", test_mu_contacts_cache_ignored);
+ g_test_add_func("/contacts-cache/for-each", test_mu_contacts_cache_foreach);
+ g_test_add_func("/contacts-cache/sort", test_mu_contacts_cache_sort);
+ g_test_add_func("/contacts-cache/valid-address", test_mu_contacts_valid_address);
return g_test_run();
}
diff --git a/lib/mu-contacts-cache.hh b/lib/mu-contacts-cache.hh
index 090195b..d31c9dc 100644
--- a/lib/mu-contacts-cache.hh
+++ b/lib/mu-contacts-cache.hh
@@ -116,7 +116,6 @@ public:
*/
bool is_personal(const std::string& addr) const;
-
/**
* Does this look like an email-address that should be ignored?
*
@@ -127,6 +126,15 @@ public:
bool is_ignored(const std::string& addr) const;
/**
+ * Does this look like a valid email-address?
+ *
+ * @param addr some e-mail address
+ *
+ * @return true or false
+ */
+ bool is_valid(const std::string& addr) const;
+
+ /**
* Find a contact based on the email address. This is not safe, since
* the returned ptr can be invalidated at any time; only for unit-tests.
*
diff --git a/mu/mu-cmd-cfind.cc b/mu/mu-cmd-cfind.cc
index 5385c9c..9c61595 100644
--- a/mu/mu-cmd-cfind.cc
+++ b/mu/mu-cmd-cfind.cc
@@ -278,7 +278,7 @@ Mu::mu_cmd_cfind(const Mu::Store& store, const Mu::Options& opts)
if (opts.cfind.maxnum && num > *opts.cfind.maxnum)
return false; /* stop the loop */
- if (!contact.has_valid_email())
+ if (!store.contacts_cache().is_valid(contact.email))
return true; /* next */
// filter for maxnum, personal & "after"