[mw-devel] [Git][arthur/mw][master] Convert duktape's cesu-8 strings to utf-8 in js_print
Andrew Price
welshbyte at sucs.org
Fri Jul 28 01:16:21 BST 2017
Andrew Price pushed to branch master at Justin Mitchell / mw
Commits:
95c3f705 by Andrew Price at 2017-07-28T01:10:35+01:00
Convert duktape's cesu-8 strings to utf-8 in js_print
🔥🔥🤷🔥🔥
- - - - -
1 changed file:
- src/client/js-duk.c
Changes:
=====================================
src/client/js-duk.c
=====================================
--- a/src/client/js-duk.c
+++ b/src/client/js-duk.c
@@ -79,14 +79,76 @@ static void start_timeout(void)
timeout_event = alarm_after(3, 0, NULL, &timeout);
}
+/* Duktape uses a CESU-8 encoding, which allows UTF-16 surrogate pairs
+ (themselves encoded in UTF-8), in order to be kinda-sorta compatible with
+ ecmascript's UTF-16 requirements. This function just copies the cesu8 string,
+ converting any surrogate pairs it finds to UTF-8. */
+static char *cesu8_to_utf8(const char *cesu8)
+{
+ char *utf8 = calloc(1, strlen(cesu8) + 1);
+ const unsigned char *cc = (void *)cesu8;
+ char *cu = utf8;
+ uint32_t hs = 0;
+
+ while (*cc != '\0') {
+ uint32_t c = 0;
+ uint32_t u;
+
+ if (cc[0] <= 0x7F) {
+ *cu++ = *cc++;
+ continue;
+ } else if (cc[0] <= 0xDF) {
+ *cu++ = *cc++;
+ *cu++ = *cc++;
+ continue;
+ } else if (cc[0] <= 0xEF) {
+ /* Surrogates are encoded in 3 chars so convert
+ back to a single UTF-16 value */
+ c = ((uint32_t)cc[0] & 0xF) << 12 |
+ ((uint32_t)cc[1] & 0x3F) << 6 |
+ ((uint32_t)cc[2] & 0x3F);
+ } else {
+ *cu++ = *cc++;
+ *cu++ = *cc++;
+ *cu++ = *cc++;
+ *cu++ = *cc++;
+ continue;
+ }
+ if (hs == 0 && c >= 0xD800 && c <= 0xDBFF)
+ hs = c;
+ else if (hs != 0 && c >= 0xDC00 && c <= 0xDFFF) {
+ /* Have high and low surrogates - convert to code point then
+ back to UTF-8 */
+ u = 0x10000 + ((((uint32_t)hs & 0x3FF) << 10) | (c & 0x3FF));
+ *cu++ = 0xF0 | u >> 18;
+ *cu++ = 0x80 | (u >> 12 & 0x3F);
+ *cu++ = 0x80 | (u >> 6 & 0x3F);
+ *cu++ = 0x80 | (u & 0x3F);
+ hs = 0;
+ } else {
+ *cu++ = cc[0];
+ *cu++ = cc[1];
+ *cu++ = cc[2];
+ hs = 0;
+ }
+ cc += 3;
+ }
+ *cu = '\0';
+ return utf8;
+}
+
static duk_ret_t js_print(duk_context *cx)
{
int argc = duk_get_top(cx);
if (argc < 1)
return 0;
- for (int i = 0; i < argc; i++)
- display_message(duk_to_string(cx, i - argc), 0, 1);
+ for (int i = 0; i < argc; i++) {
+ const char *cesu8 = duk_to_string(cx, i - argc);
+ char *utf8 = cesu8_to_utf8(cesu8);
+ display_message(utf8, 0, 1);
+ free(utf8);
+ }
return 0;
}
View it on GitLab: https://projects.sucs.org/arthur/mw/commit/95c3f70527013693c0860357c871f74b53786c9b
---
View it on GitLab: https://projects.sucs.org/arthur/mw/commit/95c3f70527013693c0860357c871f74b53786c9b
You're receiving this email because of your account on projects.sucs.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.sucs.org/pipermail/mw-devel/attachments/20170728/8a797591/attachment-0001.html>
More information about the mw-devel
mailing list