diff options
| author | Dirk-Jan C. Binnema <djcb@djcbsoftware.nl> | 2021-03-16 16:51:01 +0200 |
|---|---|---|
| committer | Dirk-Jan C. Binnema <djcb@djcbsoftware.nl> | 2021-03-16 16:51:01 +0200 |
| commit | 725826231fda0f736157f087355501fbf61fcc4e (patch) | |
| tree | f127367f0d33f4cf0ffecd49af5b72f1df24baf8 /lib/utils | |
| parent | b7660ed33de0612dbf695ee07605c01efa9c7b6e (diff) | |
utils: Add remove_ctrl
Add a helper function to remove control characters / multi-spaces, and a
test.
Diffstat (limited to 'lib/utils')
| -rw-r--r-- | lib/utils/mu-utils.cc | 16 | ||||
| -rw-r--r-- | lib/utils/test-utils.cc | 16 |
2 files changed, 32 insertions, 0 deletions
diff --git a/lib/utils/mu-utils.cc b/lib/utils/mu-utils.cc index 59ece80..d80dfb2 100644 --- a/lib/utils/mu-utils.cc +++ b/lib/utils/mu-utils.cc @@ -147,6 +147,22 @@ Mu::utf8_clean (const std::string& dirty) clean.erase (clean.find_last_not_of(" ") + 1); // remove trailing space return clean; +std::string +Mu::remove_ctrl (const std::string& str) +{ + char prev{'\0'}; + std::string result; + result.reserve(str.length()); + + for (auto&& c: str) { + if (::iscntrl(c) || c == ' ') { + if (prev != ' ') + result += prev = ' '; + } else + result += prev = c; + } + + return result; } std::vector<std::string> diff --git a/lib/utils/test-utils.cc b/lib/utils/test-utils.cc index b1f666e..020135f 100644 --- a/lib/utils/test-utils.cc +++ b/lib/utils/test-utils.cc @@ -137,6 +137,22 @@ test_flatten () } static void +test_remove_ctrl () +{ + CaseVec cases = { + { "Foo\n\nbar", true, "Foo bar" }, + { "", false, "" }, + { " ", false, " " }, + { "Hello World ", false, "Hello World " }, + { "Ångström", false, "Ångström" }, + }; + + test_cases (cases, [](auto s, auto f){ return remove_ctrl(s); }); +} + + + +static void test_clean () { CaseVec cases = { |
