summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDirk-Jan C. Binnema <djcb@djcbsoftware.nl>2020-10-13 23:38:26 +0300
committerDirk-Jan C. Binnema <djcb@djcbsoftware.nl>2020-10-17 10:36:25 +0300
commitdbff5671dd5cc3d01f6d3e7644166a4db0208a8f (patch)
tree91cdb308cd770bdbfe4ffc8eb0fc4f09193a3808
parent5cd6226ebd2c152e79e6c06a7e9a96c25048ed5f (diff)
lib: support 'personal' regexp, move to mu-contacts
Move the determination of "personal" to MuContacts; add support for regexps (POSIX-basic, in //)
-rw-r--r--lib/mu-contacts.cc133
-rw-r--r--lib/mu-contacts.hh42
-rw-r--r--lib/mu-store.cc52
-rw-r--r--lib/mu-store.hh4
-rw-r--r--lib/test-mu-contacts.cc35
-rw-r--r--lib/test-mu-store.cc1
-rw-r--r--man/mu-init.116
7 files changed, 185 insertions, 98 deletions
diff --git a/lib/mu-contacts.cc b/lib/mu-contacts.cc
index e5bb21f..b59e059 100644
--- a/lib/mu-contacts.cc
+++ b/lib/mu-contacts.cc
@@ -25,6 +25,7 @@
#include <sstream>
#include <functional>
#include <algorithm>
+#include <regex>
#include <utils/mu-utils.hh>
#include <glib.h>
@@ -34,7 +35,21 @@ using namespace Mu;
ContactInfo::ContactInfo (const std::string& _full_address,
const std::string& _email,
const std::string& _name,
- bool _personal, time_t _last_seen, size_t _freq):
+ time_t _last_seen):
+ full_address{_full_address},
+ email{_email},
+ name{_name},
+ last_seen{_last_seen},
+ freq{1},
+ tstamp{g_get_monotonic_time()} {}
+
+
+ContactInfo::ContactInfo (const std::string& _full_address,
+ const std::string& _email,
+ const std::string& _name,
+ bool _personal,
+ time_t _last_seen,
+ size_t _freq):
full_address{_full_address},
email{_email},
name{_name},
@@ -43,7 +58,6 @@ ContactInfo::ContactInfo (const std::string& _full_address,
freq{_freq},
tstamp{g_get_monotonic_time()} {}
-
struct EmailHash {
std::size_t operator()(const std::string& email) const {
std::size_t djb = 5381; // djb hash
@@ -95,19 +109,55 @@ using ContactUMap = std::unordered_map<const std::string, ContactInfo, EmailHash
using ContactSet = std::set<std::reference_wrapper<const ContactInfo>, ContactInfoLessThan>;
struct Contacts::Private {
- Private(const std::string& serialized):
- contacts_{deserialize(serialized)}
- {}
+ Private(const std::string& serialized,
+ const StringVec& personal):
+ contacts_{deserialize(serialized)} {
+ make_personal(personal);
+ }
+ void make_personal(const StringVec& personal);
ContactUMap deserialize(const std::string&) const;
std::string serialize() const;
ContactUMap contacts_;
std::mutex mtx_;
+
+ StringVec personal_plain_;
+ std::vector<std::regex> personal_rx_;
};
constexpr auto Separator = "\xff"; // Invalid in UTF-8
+void
+Contacts::Private::make_personal (const StringVec& personal)
+{
+ for (auto&& p: personal) {
+
+ if (p.empty())
+ continue; // invalid
+
+ if (p.size() < 2 || p.at(0) != '/' || p.at(p.length() - 1) != '/')
+ personal_plain_.emplace_back(p); // normal address
+ else {
+ // a regex pattern.
+ try {
+ const auto rxstr{p.substr(1, p.length()-2)};
+ personal_rx_.emplace_back(
+ std::regex(rxstr,
+ std::regex::basic |
+ std::regex::optimize |
+ std::regex::icase));
+
+ } catch (const std::regex_error& rex) {
+ g_warning ("invalid personal address regexp '%s': %s",
+ p.c_str(), rex.what());
+ }
+ }
+ }
+}
+
+
+
ContactUMap
Contacts::Private::deserialize(const std::string& serialized) const
{
@@ -131,15 +181,14 @@ Contacts::Private::deserialize(const std::string& serialized) const
(std::size_t)g_ascii_strtoll(parts[5].c_str(), NULL, 10)); // freq
contacts.emplace(std::move(parts[1]), std::move(ci));
-
}
return contacts;
}
-Contacts::Contacts (const std::string& serialized) :
- priv_{std::make_unique<Private>(serialized)}
+Contacts::Contacts (const std::string& serialized, const StringVec& personal) :
+ priv_{std::make_unique<Private>(serialized, personal)}
{}
Contacts::~Contacts() = default;
@@ -170,44 +219,42 @@ Contacts::serialize() const
}
-// for now, we only care about _not_ having newlines.
static void
wash (std::string& str)
{
str.erase(std::remove(str.begin(), str.end(), '\n'), str.end());
}
-
void
Contacts::add (ContactInfo&& ci)
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
- auto down = g_ascii_strdown (ci.email.c_str(), -1);
- std::string email{down};
- g_free(down);
-
- auto it = priv_->contacts_.find(email);
- if (it != priv_->contacts_.end()) {
- auto& ci2 = it->second;
- ++ci2.freq;
- if (ci.last_seen > ci2.last_seen) {
- ci2.last_seen = ci.last_seen;
- wash(ci.email);
- ci2.email = std::move(ci.email);
- if (!ci.name.empty()) {
- wash(ci.name);
- ci2.name = std::move(ci.name);
- }
+ auto it = priv_->contacts_.find(ci.email);
+
+ if (it == priv_->contacts_.end()) { // completely new contact
+ wash(ci.name);
+ wash(ci.full_address);
+ ci.freq = 1;
+ ci.personal = is_personal(ci.email);
+ auto email{ci.email};
+ priv_->contacts_.emplace(ContactUMap::value_type(email, std::move(ci)));
+ } else { // existing contact.
+ auto& ci_existing{it->second};
+ ++ci_existing.freq;
+
+ if (ci.last_seen > ci_existing.last_seen) {
+ // update.
+ wash(ci.name);
+ ci_existing.name = std::move(ci.name);
+
+ ci_existing.email = std::move(ci.email);
+
+ wash(ci.full_address);
+ ci_existing.full_address = std::move(ci.full_address);
+ ci_existing.tstamp = g_get_monotonic_time();
}
}
-
- wash(ci.name);
- wash(ci.email);
- wash(ci.full_address);
-
- priv_->contacts_.emplace(
- ContactUMap::value_type(std::move(email), std::move(ci)));
}
@@ -216,8 +263,7 @@ Contacts::_find (const std::string& email) const
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
- ContactInfo ci{"", email, "", false, 0};
- const auto it = priv_->contacts_.find(ci.email);
+ const auto it = priv_->contacts_.find(email);
if (it == priv_->contacts_.end())
return {};
else
@@ -260,6 +306,23 @@ Contacts::for_each(const EachContactFunc& each_contact) const
each_contact (ci);
}
+bool
+Contacts::is_personal(const std::string& addr) const
+{
+ for (auto&& p: priv_->personal_plain_)
+ if (g_ascii_strcasecmp(addr.c_str(), p.c_str()) == 0)
+ return true;
+
+ for (auto&& rx: priv_->personal_rx_) {
+ std::smatch m; // perhaps cache addr in personal_plain_?
+ if (std::regex_match(addr, m, rx))
+ return true;
+ }
+
+ return false;
+}
+
+
/// C binding
size_t
diff --git a/lib/mu-contacts.hh b/lib/mu-contacts.hh
index 7873cd6..c6e4b01 100644
--- a/lib/mu-contacts.hh
+++ b/lib/mu-contacts.hh
@@ -34,6 +34,7 @@ typedef struct _MuContacts MuContacts;
#include <string>
#include <time.h>
#include <inttypes.h>
+#include <utils/mu-utils.hh>
namespace Mu {
@@ -46,25 +47,38 @@ struct ContactInfo {
* @param _full_address the full email address + name.
* @param _email email address
* @param _name name or empty
+ * @param _last_seen when was this contact last seen?
+ */
+ ContactInfo (const std::string& _full_address,
+ const std::string& _email,
+ const std::string& _name,
+ time_t _last_seen);
+
+ /**
+ * Construct a new ContactInfo
+ *
+ * @param _full_address the full email address + name.
+ * @param _email email address
+ * @param _name name or empty
* @param _personal is this a personal contact?
* @param _last_seen when was this contact last seen?
* @param _freq how often was this contact seen?
- *
- * @return
*/
ContactInfo (const std::string& _full_address,
const std::string& _email,
const std::string& _name,
- bool _personal, time_t _last_seen, size_t _freq=1);
+ bool personal,
+ time_t _last_seen,
+ size_t freq);
std::string full_address; /**< Full name <email> */
std::string email; /**< email address */
std::string name; /**< name (or empty) */
- bool personal; /**< is this a personal contact? */
- time_t last_seen; /**< when was this contact last seen? */
- std::size_t freq; /**< how often was this contact seen? */
+ bool personal{}; /**< is this a personal contact? */
+ time_t last_seen{}; /**< when was this contact last seen? */
+ std::size_t freq{}; /**< how often was this contact seen? */
- int64_t tstamp; /**< Time-stamp, as per g_get_monotonic_time */
+ int64_t tstamp{}; /**< Time-stamp, as per g_get_monotonic_time */
};
/// All contacts
@@ -74,8 +88,10 @@ public:
* Construct a new contacts objects
*
* @param serialized serialized contacts
+ * @param personal personal addresses
*/
- Contacts (const std::string& serialized = "");
+ Contacts (const std::string& serialized = "",
+ const StringVec& personal={});
/**
* DTOR
@@ -118,6 +134,16 @@ public:
*/
std::string serialize() const;
+
+ /**
+ * Does this look like a 'personal' address?
+ *
+ * @param addr some e-mail address
+ *
+ * @return true or false
+ */
+ bool is_personal(const std::string& addr) const;
+
/**
* Find a contact based on the email address. This is not safe, since
* the returned ptr can be invalidated at any time; only for unit-tests.
diff --git a/lib/mu-store.cc b/lib/mu-store.cc
index cc3b9ea..d0e7cc7 100644
--- a/lib/mu-store.cc
+++ b/lib/mu-store.cc
@@ -114,7 +114,7 @@ struct Store::Private {
Private (const std::string& path, bool readonly):
db_{make_xapian(path, readonly ? XapianOpts::ReadOnly : XapianOpts::Open)},
mdata_{make_metadata(path)},
- contacts_{db()->get_metadata(ContactsKey)} {
+ contacts_{db()->get_metadata(ContactsKey), mdata_.personal_addresses} {
if (!readonly)
wdb()->begin_transaction();
@@ -123,7 +123,8 @@ struct Store::Private {
Private (const std::string& path, const std::string& root_maildir,
const StringVec& personal_addresses, const Store::Config& conf):
db_{make_xapian(path, XapianOpts::CreateOverwrite)},
- mdata_{init_metadata(conf, path, root_maildir, personal_addresses)} {
+ mdata_{init_metadata(conf, path, root_maildir, personal_addresses)},
+ contacts_{"", mdata_.personal_addresses} {
wdb()->begin_transaction();
}
@@ -307,7 +308,6 @@ Store::metadata() const
const Contacts&
Store::contacts() const
{
- LOCKED;
return priv_->contacts_;
}
@@ -1045,32 +1045,11 @@ each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc)
contacts.add(Mu::ContactInfo(contact->full_address,
contact->email,
contact->name ? contact->name : "",
- msgdoc->_personal,
mu_msg_get_date(msgdoc->_msg)));
}
return TRUE;
}
-
-
-static gboolean
-each_contact_check_if_personal (MuMsgContact *contact, MsgDoc *msgdoc)
-{
- if (msgdoc->_personal || !contact->email)
- return TRUE;
-
- for (const auto& cur : *msgdoc->_my_addresses) {
- if (g_ascii_strcasecmp
- (contact->email,
- (const char*)cur.c_str()) == 0) {
- msgdoc->_personal = TRUE;
- break;
- }
- }
-
- return TRUE;
-}
-
static Xapian::Document
new_doc_from_message (MuStore *store, MuMsg *msg)
{
@@ -1079,17 +1058,20 @@ new_doc_from_message (MuStore *store, MuMsg *msg)
mu_msg_field_foreach ((MuMsgFieldForeachFunc)add_terms_values, &docinfo);
- /* determine whether this is 'personal' email, ie. one of my
- * e-mail addresses is explicitly mentioned -- it's not a
- * mailing list message. Callback will update docinfo->_personal */
- const auto& personal_addresses = self(store)->metadata().personal_addresses;
- if (personal_addresses.size()) {
- docinfo._my_addresses = &personal_addresses;
- mu_msg_contact_foreach
- (msg,
- (MuMsgContactForeachFunc)each_contact_check_if_personal,
- &docinfo);
- }
+ mu_msg_contact_foreach
+ (msg, [](auto contact, gpointer msgdocptr)->gboolean {
+ auto msgdoc{reinterpret_cast<MsgDoc*>(msgdocptr)};
+
+ if (!contact->email)
+ return FALSE; // invalid contact
+ else if (msgdoc->_personal)
+ return TRUE; // already deemed personal
+
+ if (msgdoc->_store->contacts().is_personal(contact->email))
+ msgdoc->_personal = true; // this one's personal.
+
+ return TRUE;
+ }, &docinfo);
/* also store the contact-info as separate terms, and add it
* to the cache */
diff --git a/lib/mu-store.hh b/lib/mu-store.hh
index 7641352..1794298 100644
--- a/lib/mu-store.hh
+++ b/lib/mu-store.hh
@@ -96,8 +96,6 @@ public:
* @return the metadata
*/
const Metadata& metadata() const;
-
-
/**
* Get the Contacts object for this store
*
@@ -105,7 +103,6 @@ public:
*/
const Contacts& contacts() const;
-
/**
* Get the Indexer associated with this store. It is an error
* to call this on a read-only store.
@@ -177,7 +174,6 @@ public:
*/
bool contains_message (const std::string& path) const;
-
/**
* Prototype for the ForEachFunc
*
diff --git a/lib/test-mu-contacts.cc b/lib/test-mu-contacts.cc
index 141f8b9..0d598c2 100644
--- a/lib/test-mu-contacts.cc
+++ b/lib/test-mu-contacts.cc
@@ -33,25 +33,21 @@ test_mu_contacts_01()
g_assert_cmpuint (contacts.size(), ==, 0);
contacts.add(std::move(Mu::ContactInfo ("Foo <foo.bar@example.com>",
- "foo.bar@example.com", "Foo",
- false, 12345)));
+ "foo.bar@example.com", "Foo", 12345)));
g_assert_false (contacts.empty());
g_assert_cmpuint (contacts.size(), ==, 1);
contacts.add(std::move(Mu::ContactInfo ("Cuux <cuux.fnorb@example.com>",
- "cuux@example.com", "Cuux", true,
- 54321)));
+ "cuux@example.com", "Cuux", 54321)));
g_assert_cmpuint (contacts.size(), ==, 2);
contacts.add(std::move(Mu::ContactInfo ("foo.bar@example.com",
- "foo.bar@example.com", "Foo",
- false, 77777)));
+ "foo.bar@example.com", "Foo", 77777)));
g_assert_cmpuint (contacts.size(), ==, 2);
contacts.add(std::move(Mu::ContactInfo ("Foo.Bar@Example.Com",
- "Foo.Bar@Example.Com", "Foo",
- false, 88888)));
+ "Foo.Bar@Example.Com", "Foo", 88888)));
g_assert_cmpuint (contacts.size(), ==, 2);
// note: replaces first.
@@ -60,7 +56,6 @@ test_mu_contacts_01()
g_assert_false (info);
}
-
{
const auto info = contacts._find("foo.BAR@example.com");
g_assert_true (info);
@@ -73,6 +68,27 @@ test_mu_contacts_01()
g_assert_cmpuint (contacts.size(), ==, 0);
}
+static void
+test_mu_contacts_02()
+{
+ Mu::StringVec personal = {
+ "foo@example.com",
+ "bar@cuux.org",
+ "/bar-.*@fnorb.f./"
+ };
+ Mu::Contacts contacts{"", personal};
+
+ g_assert_true (contacts.is_personal("foo@example.com"));
+ g_assert_true (contacts.is_personal("Bar@CuuX.orG"));
+ g_assert_true (contacts.is_personal("bar-123abc@fnorb.fi"));
+ g_assert_true (contacts.is_personal("bar-zzz@fnorb.fr"));
+
+ g_assert_false (contacts.is_personal("foo@bar.com"));
+ g_assert_false (contacts.is_personal("BÂr@CuuX.orG"));
+ g_assert_false (contacts.is_personal("bar@fnorb.fi"));
+ g_assert_false (contacts.is_personal("bar-zzz@fnorb.xr"));
+}
+
int
@@ -81,6 +97,7 @@ main (int argc, char *argv[])
g_test_init (&argc, &argv, NULL);
g_test_add_func ("/mu-contacts/01", test_mu_contacts_01);
+ g_test_add_func ("/mu-contacts/02", test_mu_contacts_02);
g_log_set_handler (NULL,
(GLogLevelFlags)
diff --git a/lib/test-mu-store.cc b/lib/test-mu-store.cc
index cdafd5f..9b1a1d2 100644
--- a/lib/test-mu-store.cc
+++ b/lib/test-mu-store.cc
@@ -79,7 +79,6 @@ test_store_add_count_remove ()
}
-
int
main (int argc, char *argv[])
{
diff --git a/man/mu-init.1 b/man/mu-init.1
index 34f95d8..66aea7f 100644
--- a/man/mu-init.1
+++ b/man/mu-init.1
@@ -1,4 +1,4 @@
-.TH MU-INIT 1 "February 2020" "User Manuals"
+.TH MU-INIT 1 "October 2020" "User Manuals"
.SH NAME
@@ -10,13 +10,14 @@ mu init \- initialize the mu message database
.SH DESCRIPTION
-\fBmu init\fR is the \fBmu\fR command for setting up the mu message
-database. After \fBmu init\fR has completed, you can run \fBmu index\fR
+\fBmu init\fR is the subcommand for setting up the mu message
+database. After \fBmu init\fR has completed, you can run \fBmu
+index\fR
.SH OPTIONS
-Note, some of the general options are described in the \fBmu(1)\fR man-page and
-not here, as they apply to multiple mu commands.
+Note, some of the general options are described in the \fBmu(1)\fR
+man-page and not here, as they apply to multiple mu commands.
.TP
\fB\-\-muhome\fR
@@ -34,7 +35,6 @@ are not supported.
.TP
\fB\-\-my-address\fR=\fI<my-email-address>\fR
-
specifies that some e-mail addresses are 'my-address' (\fB\-\-my-address\fR can
be used multiple times). This is used by \fBmu cfind\fR -- any e-mail address
found in the address fields of a message which also has \fI<my-email-address>\fR
@@ -42,6 +42,10 @@ in one of its address fields is considered a \fIpersonal\fR e-mail address. This
allows you, for example, to filter out (\fBmu cfind --personal\fR) addresses
which were merely seen in mailing list messages.
+\fI<my-email-address>\fR can be either a plain e-mail address (such as
+\fBfoo@example.com\fR), or a regular-expression (of the 'Basic POSIX'
+flavor), wrapped in \B/\fR (such as \B/foo-.*@example\\.com\fR).
+
.SH ENVIRONMENT
\fBmu init\fR uses \fBMAILDIR\fR to find the user's Maildir if it has not been