aboutsummaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorLukas Fürmetz <fuermetz@mailbox.org>2017-11-15 22:06:06 +0100
committerLukas Fürmetz <fuermetz@mailbox.org>2017-11-16 12:05:24 +0100
commit865528b34398ee4ad9d7dcfa86e54e27106bd4d5 (patch)
treea284a217be7bfc42fcceb37c26552f87ac8bd974 /utf8.c
parentbcf25fa15a086ac8c083835e4d2d3cde1642740e (diff)
Extract utf8.c
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c69
1 files changed, 69 insertions, 0 deletions
diff --git a/utf8.c b/utf8.c
new file mode 100644
index 0000000..49e1477
--- /dev/null
+++ b/utf8.c
@@ -0,0 +1,69 @@
+#include "utf8.h"
+
+size_t codepoint_to_utf8(const uint32_t codepoint, unsigned char buffer[4]) {
+ if (codepoint <= 0x7F) {
+ buffer[0] = codepoint;
+ return 1;
+ }
+ if (codepoint >= 0x80 && codepoint <= 0x07FF) {
+ buffer[0] = 0xC0 | (codepoint >> 6);
+ buffer[1] = 0x80 | (codepoint & 0x3F);
+ return 2;
+ }
+ if (codepoint >= 0x0800 && codepoint <= 0xFFFF) {
+ buffer[0] = 0xE0 | (codepoint >> 12);
+ buffer[1] = 0x80 | ((codepoint >> 6) & 0x3F);
+ buffer[2] = 0x80 | (codepoint & 0x3F);
+ return 3;
+ }
+
+ if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) {
+ buffer[0] = 0xF0 | (codepoint >> 18);
+ buffer[1] = 0x80 | ((codepoint >> 12) & 0x3F);
+ buffer[2] = 0x80 | ((codepoint >> 6) & 0x3F);
+ buffer[3] = 0x80 | (codepoint & 0x3F);
+ return 4;
+ }
+ return 0;
+}
+
+bool utf8_to_codepoint(const unsigned char buffer[4], const size_t len,
+ uint32_t *codepoint) {
+ *codepoint = 0;
+ if (len == 1 && buffer[0] <= 0x7F) {
+ *codepoint = buffer[0];
+ return true;
+ }
+ if (len == 2 && (buffer[0] >= 0xC0 && buffer[0] <= 0xDF) &&
+ (buffer[1] >= 0x80 && buffer[1] <= 0xBF)) {
+ *codepoint = buffer[0] & 0x1F;
+ *codepoint = *codepoint << 6;
+ *codepoint = *codepoint | (buffer[1] & 0x3F);
+ return true;
+ }
+ if (len == 3 && (buffer[0] >= 0xE0 && buffer[0] <= 0xEF) &&
+ (buffer[1] >= 0x80 && buffer[1] <= 0xBF) &&
+ (buffer[2] >= 0x80 && buffer[2] <= 0xBF)) {
+ *codepoint = buffer[0] & 0xF;
+ *codepoint = *codepoint << 6;
+ *codepoint = *codepoint | (buffer[1] & 0x3F);
+ *codepoint = *codepoint << 6;
+ *codepoint = *codepoint | (buffer[2] & 0x3F);
+ return true;
+ }
+ if (len == 4 && (buffer[0] >= 0xF0 && buffer[0] <= 0xF7) &&
+ (buffer[1] >= 0x80 && buffer[1] <= 0xBF) &&
+ (buffer[2] >= 0x80 && buffer[2] <= 0xBF) &&
+ (buffer[3] >= 0x80 && buffer[3] <= 0xBF)) {
+ *codepoint = buffer[0] & 7;
+ *codepoint = *codepoint << 6;
+ *codepoint = *codepoint | (buffer[1] & 0x3F);
+ *codepoint = *codepoint << 6;
+ *codepoint = *codepoint | (buffer[2] & 0x3F);
+ *codepoint = *codepoint << 6;
+ *codepoint = *codepoint | (buffer[3] & 0x3F);
+ return true;
+ }
+
+ return false;
+}