summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDirk-Jan C. Binnema <djcb@djcbsoftware.nl>2010-12-03 20:53:25 +0200
committerDirk-Jan C. Binnema <djcb@djcbsoftware.nl>2010-12-03 20:53:25 +0200
commit13b1e87cc4e13f9ccd86b2086883033fb6339ec0 (patch)
tree3819fc1ddf9f6e50ce28d72a45727f54b55dddf4 /src
parent10b0f321c88a813a2d72244fe14d2dbde8ab6911 (diff)
* mu-str-normalize.c: add note about alternative implementation
Diffstat (limited to 'src')
-rw-r--r--src/mu-str-normalize.c12
1 files changed, 12 insertions, 0 deletions
diff --git a/src/mu-str-normalize.c b/src/mu-str-normalize.c
index d5fa801..a6d6a75 100644
--- a/src/mu-str-normalize.c
+++ b/src/mu-str-normalize.c
@@ -28,6 +28,7 @@
#include "mu-str.h"
+
char*
mu_str_normalize (const char *str, gboolean downcase)
{
@@ -36,6 +37,17 @@ mu_str_normalize (const char *str, gboolean downcase)
return mu_str_normalize_in_place (g_strdup(str), downcase);
}
+
+/*
+ * this implementation works for accented chars in Unicode Blocks
+ * 'Latin-1 Supplement' and 'Latin Extended-A'. An alternative (slower
+ * but much simpler) implementation would be to use g_utf8_normalize
+ * to decompose characters in the accent part and the character part,
+ * and then get rid of the former. That would be slower than what we
+ * do here, but also more *complete*. It's unclear whether it would
+ * be slower *in practice* => needs checking
+ */
+
/* we can normalize in-place, as the normalized string will never be
* longer than the original. even for replacements that are 2 chars
* wide (e.g. German ß => ss), the replacement is 2 bytes, like the