[mw-devel] MW3 r953 - trunk/src
psycodom at sucs.org
psycodom at sucs.org
Mon Apr 23 08:29:17 BST 2007
Author: psycodom
Date: 2007-04-23 08:29:16 +0100 (Mon, 23 Apr 2007)
New Revision: 953
Modified:
trunk/src/js.c
Log:
Adds UTF-8 support to javascript. Fixes #23
Modified: trunk/src/js.c
===================================================================
--- trunk/src/js.c 2007-04-22 20:00:35 UTC (rev 952)
+++ trunk/src/js.c 2007-04-23 07:29:16 UTC (rev 953)
@@ -51,58 +51,139 @@
JSCLASS_NO_OPTIONAL_MEMBERS
};
-char *
-utf16tolocal(char * utf16, size_t len) {
- char * local;
- char * utf16cpy;
- char * charset;
+// currently assuming all scripts and script arguments will be in utf8
+// if the users client is not utf8 then that issue needs to be dealt with elsewhere
+char *jsstring_to_utf8(jschar *the_jsstring, size_t len)
+{
+ char * utf8_string;
+ char * utf8_string_tmp;
+
+ char * jsstring_ptr;
+ char * utf8_ptr;
+
iconv_t conv;
size_t nconv;
- size_t localbytesleft;
- size_t utf16bytesleft;
- char * localcpy;
- /* TODO: charset should be replaced with the charset of the locale */
- charset = "UTF-8";
- conv = iconv_open(charset, "UTF16");
+ size_t utf8_bytes_left;
+ size_t jsstring_bytes_left;
+ size_t utf8_length;
+
+ conv = iconv_open("UTF-8", "UCS-2");
if (conv == (iconv_t)-1) {
- fprintf(stderr, "utf16tolocal bombed.\n");
+ fprintf(stderr, "jsstring_to_utf8: iconv_open() failed.\n");
return NULL;
}
- localbytesleft = (len) * sizeof(char) * 2; /* Urgh, x2 is kludge.. but how else? */
- utf16bytesleft = (len) * sizeof(jschar);
- local = (char *)malloc(localbytesleft);
+ utf8_bytes_left = (len) * sizeof(char) * 4; /* Urgh, x4 is kludge (x2 isn't big enough, some utf8 chars require 4 bytes and in a worst case scenario we'd run out) */
+ jsstring_bytes_left = (len) * sizeof(jschar);
+ utf8_string_tmp = (char *)malloc(utf8_bytes_left);
+
+ if (utf8_string_tmp == NULL) {
+ fprintf(stderr, "jsstring_to_utf8: Could not allocate memory for iconv\n");
+ return NULL;
+ }
+
+ utf8_ptr = utf8_string_tmp;
+ jsstring_ptr = (char *)the_jsstring;
- if (local == NULL) {
- fprintf(stderr, "Could not allocate memory for iconv\n");
+ while (jsstring_bytes_left > 0) {
+/* printf("Before: localbytesleft: %d utf16bytesleft: %d\n",
+ (int)localbytesleft, (int)utf16bytesleft); */
+ nconv = iconv(conv,
+ &jsstring_ptr, &jsstring_bytes_left,
+ &utf8_ptr, &utf8_bytes_left);
+/* printf("After: localbytesleft: %d utf16bytesleft: %d\n",
+ (int)localbytesleft, (int)jsstring_bytes_left); */
+ if (nconv == (size_t)-1) {
+ fprintf(stderr, "jsstring_to_utf8: iconv() barfed with error %d - ", errno);
+ /* iconv barfed, but why? */
+ if (errno == EILSEQ || errno == EINVAL) {
+ /* invalid input sequence, skip it */
+ fprintf(stderr, "Invalid input sequence\n");
+ jsstring_ptr++;
+ jsstring_bytes_left--;
+ errno = 0;
+ continue;
+ } else {
+ /* some other error, recover what we can */
+ *(char *)utf8_ptr = '\0';
+ perror("iconv");
+ errno = 0;
+ break;
+ }
+ }
+ }
+ iconv_close(conv);
+ utf8_length=(len*4)-utf8_bytes_left;
+
+ utf8_string=(char *)malloc(sizeof(char)*(utf8_length+1));
+ if(utf8_string==NULL) {
+ fprintf(stderr, "jsstring_to_utf8: Could not allocate memory for the utf8_string\n");
return NULL;
}
+ strncpy(utf8_string, utf8_string_tmp, utf8_length);
+ utf8_string[utf8_length]='\0';
+ free(utf8_string_tmp);
- localcpy = local;
- utf16cpy = utf16;
+ return utf8_string;
+}
- while (utf16bytesleft > 0) {
+jschar *utf8_to_jsstring(char *utf8_string, size_t *length, int *utferror)
+{
+ char * the_jsstring; // iconv uses char*, we'll cast to jschar* at the end.
+
+ char * jsstring_ptr;
+ char * utf8_ptr;
+
+ iconv_t conv;
+ size_t nconv;
+
+ size_t utf8_bytes_left;
+ size_t jsstring_bytes_left;
+
+ *utferror=0;
+ conv = iconv_open("UCS-2", "UTF-8");
+ if (conv == (iconv_t)-1) {
+ fprintf(stderr, "utf8_to_jsstring: iconv_open() failed.\n");
+ return NULL;
+ }
+
+ utf8_bytes_left = (strlen(utf8_string)+1) * sizeof(char);
+ jsstring_bytes_left = (strlen(utf8_string)+1) * sizeof(jschar);
+ the_jsstring = (char *)malloc(jsstring_bytes_left);
+
+ if (the_jsstring == NULL) {
+ fprintf(stderr, "utf8_to_jsstring: Could not allocate memory for iconv\n");
+ return NULL;
+ }
+
+ utf8_ptr = utf8_string;
+ jsstring_ptr = the_jsstring;
+
+ while (utf8_bytes_left > 0) {
/* printf("Before: localbytesleft: %d utf16bytesleft: %d\n",
(int)localbytesleft, (int)utf16bytesleft); */
nconv = iconv(conv,
- &utf16cpy, &utf16bytesleft,
- &localcpy, &localbytesleft);
+ &utf8_ptr, &utf8_bytes_left,
+ &jsstring_ptr, &jsstring_bytes_left);
/* printf("After: localbytesleft: %d utf16bytesleft: %d\n",
- (int)localbytesleft, (int)utf16bytesleft); */
+ (int)localbytesleft, (int)jsstring_bytes_left); */
if (nconv == (size_t)-1) {
- fprintf(stderr, "utf16tolocal barfed (%d) ", errno);
+ fprintf(stderr, "utf8_to_jsstring: iconv() barfed with error %d - ", errno);
/* iconv barfed, but why? */
if (errno == EILSEQ || errno == EINVAL) {
/* invalid input sequence, skip it */
fprintf(stderr, "Invalid input sequence\n");
- utf16++;
- utf16bytesleft--;
+ utf8_ptr++;
+ utf8_bytes_left--;
errno = 0;
+ *utferror=1;
continue;
} else {
/* some other error, recover what we can */
- *(char *)localcpy = '\0';
+ /**(char *)jsstring_ptr = '\0';
+ jsstring_ptr++;
+ *(char *)jsstring_ptr = '\0';*/
perror("iconv");
errno = 0;
break;
@@ -110,15 +191,21 @@
}
}
iconv_close(conv);
- return local;
+
+ *length=strlen(utf8_string)-(jsstring_bytes_left/(sizeof(jschar)));
+
+ return (jschar *)the_jsstring;
}
+
+
/* Function for printing to standard out from javascript (helpful for
* debugging and demonstrates how to call C from js)
*/
static JSBool
js_print(JSContext *cx, JSObject __attribute__((unused)) *obj, uintN argc, jsval *argv, jsval __attribute__((unused)) *rval) {
JSString *jsmsg;
+ jschar *ucmsg;
size_t len;
uintN i;
@@ -132,10 +219,15 @@
if (JSVAL_IS_STRING(argv[i])) {
jsmsg = JS_ValueToString(cx,argv[i]);
len = JS_GetStringLength(jsmsg);
- //ucmsg = JS_GetStringChars(jsmsg);
- //msg = utf16tolocal((char *)ucmsg, len);
- msg = JS_GetStringBytes(jsmsg);
+ ucmsg = JS_GetStringChars(jsmsg);
+ msg = jsstring_to_utf8(ucmsg, len);
+ if(msg==NULL) {
+ printf("js_print: failed to convert jsstring to utf8\n");
+ return JS_FALSE;
+ }
+ //msg = JS_GetStringBytes(jsmsg);
display_message(msg, 0, 1);
+ free(msg);
//printf("%s",msg);
} else
if (JSVAL_IS_NULL(argv[i])) {
@@ -155,6 +247,8 @@
static JSBool
js_mwexec(JSContext *cx, JSObject __attribute__((unused)) *obj, uintN argc, jsval *argv, jsval __attribute__((unused)) *rval) {
JSString *jsmsg;
+ jschar *ucmsg;
+ size_t len;
char * msg;
if (argc < 1) {
return JS_FALSE;
@@ -162,7 +256,14 @@
if (JSVAL_IS_STRING(argv[0])) {
jsmsg = JS_ValueToString(cx,argv[0]);
- msg = strdup(JS_GetStringBytes(jsmsg));
+ len = JS_GetStringLength(jsmsg);
+ ucmsg = JS_GetStringChars(jsmsg);
+ msg = jsstring_to_utf8(ucmsg, len);
+ if(msg==NULL) {
+ printf("js_mwexec: failed to convert jsstring to utf8\n");
+ return JS_FALSE;
+ }
+ //msg = strdup(JS_GetStringBytes(jsmsg));
DoCommand(msg, chattable);
free(msg);
return JS_TRUE;
@@ -174,7 +275,11 @@
static JSBool
js_say(JSContext *cx, JSObject __attribute__((unused)) *obj, uintN argc, jsval *argv, jsval __attribute__((unused)) *rval) {
JSString *jsmsg;
+ jschar *ucmsg;
+ size_t len;
char * msg;
+ char saymsg[MAXTEXTLENGTH];
+
if (argc < 1) {
return JS_FALSE;
}
@@ -187,8 +292,20 @@
if (JSVAL_IS_STRING(argv[0])) {
jsmsg = JS_ValueToString(cx,argv[0]);
- msg = strdup(JS_GetStringBytes(jsmsg));
- chat_say(msg);
+ len = JS_GetStringLength(jsmsg);
+ ucmsg = JS_GetStringChars(jsmsg);
+
+ msg = jsstring_to_utf8(ucmsg, len);
+ if(msg==NULL) {
+ printf("js_say: failed to convert jsstring to utf8\n");
+ return JS_FALSE;
+ }
+ //msg = strdup(JS_GetStringBytes(jsmsg));
+
+ // things passed to chatsay may end up being passed to apply_gag which expects a buffer MAXTEXTLENGTH long.
+ strncpy(saymsg,msg,MAXTEXTLENGTH-1);
+ saymsg[MAXTEXTLENGTH]='\0';
+ chat_say(saymsg);
free(msg);
return JS_TRUE;
}
@@ -389,74 +506,7 @@
return JS_TRUE;
}
-/* Convert a string from local charset to a string of jschar which
- JS_NewUCString() needs to create a new JSString with unicode
- characters in. An appropriate jschar* is created by casting
- UTF-16 data to jschar*, which is why we encode to UTF-16 here. */
-jschar *
-local2jschars(char * local) {
- char * utf16;
- char * utf16cpy;
- char * charset;
- iconv_t conv;
- size_t nconv;
- size_t localbytesleft;
- size_t utf16bytesleft;
- char * localcpy;
-
- /* TODO: charset should be replaced with the charset of the locale */
- charset = "UTF-8";
- /* Little endian UTF-16 seems to be the correct encoding. */
- conv = iconv_open("UTF-16LE", charset);
- if (conv == (iconv_t)-1) {
- fprintf(stderr, "local2jschars bombed.\n");
- return NULL;
- }
-
- localbytesleft = (strlen(local)) * sizeof(char);
- utf16bytesleft = (strlen(local)) * sizeof(jschar);
- utf16 = (char *)malloc(utf16bytesleft);
-
- if (utf16 == NULL) {
- fprintf(stderr, "Could not allocate memory for iconv\n");
- return NULL;
- }
-
- localcpy = local;
- utf16cpy = utf16;
-
- while (localbytesleft > 0) {
- /* printf("Before: localbytesleft: %d utf16bytesleft: %d\n",
- localbytesleft, utf16bytesleft); */
- nconv = iconv(conv,
- &localcpy, &localbytesleft,
- &utf16cpy, &utf16bytesleft);
- /* printf("After: localbytesleft: %d utf16bytesleft: %d\n",
- localbytesleft, utf16bytesleft); */
- if (nconv == (size_t)-1) {
- fprintf(stderr, "local2jschars barfed (%d)\n", errno);
- /* iconv barfed, but why? */
- if (errno == EILSEQ || errno == EINVAL) {
- /* invalid input sequence, skip it */
- fprintf(stderr, "Invalid input sequence\n");
- local++;
- localbytesleft--;
- errno = 0;
- continue;
- } else {
- /* some other error, recover what we can */
- fprintf(stderr, "Some other error\n");
- *(char *)utf16cpy = '\0';
- perror("iconv");
- errno = 0;
- break;
- }
- }
- }
- iconv_close(conv);
- return (jschar *)utf16;
-}
-
+/* prints the type of a jsval */
void show_type(char *name, jsval j)
{
printf("%s is:",name);
@@ -480,14 +530,16 @@
}
/* Execute some javascript commands */
-int
-js_exec(char * name, int argc, char **argvc) {
+int js_exec(char * name, int argc, char **argvc) {
int i;
jschar * js_string;
jsval rval;
jsval *argv;
JSBool ret;
-
+ jschar *ucarg;
+ size_t uclen;
+ int utferror;
+
js_string = NULL;
argv=calloc(argc,sizeof(jsval));
@@ -503,8 +555,9 @@
}
// js_string = local2jschars(argvc[i]);
if (js_string != NULL) {
- // argv[i] = STRING_TO_JSVAL(JS_NewUCString(jscx, js_string, strlen(argvc[i])));
- argv[i] = STRING_TO_JSVAL(JS_NewStringCopyZ(jscx, argvc[i]));
+ ucarg=utf8_to_jsstring(argvc[i], &uclen, &utferror);
+ argv[i] = STRING_TO_JSVAL(JS_NewUCStringCopyZ(jscx, ucarg));
+ // argv[i] = STRING_TO_JSVAL(JS_NewStringCopyZ(jscx, argvc[i]));
} else {
argv[i] = STRING_TO_JSVAL(JS_NewStringCopyZ(jscx, "(Garbled string)"));
}
@@ -564,14 +617,20 @@
free(line);
}
+/* Load and execute a file in javascript */
+/* Files are assumed to be in utf-8 and an error is reported if it isn't */
+/* Non utf-8 chars are stripped */
int load_jsfile(FILE *f, char *filename)
{
char *body;
+ jschar *unicode_body;
int where, len;
+ size_t length;
JSBool success;
JSScript *script = NULL;
jsval retval;
uintN lineno=0;
+ int utferror;
where = ftell(f);
fseek(f, 0, SEEK_END);
@@ -581,36 +640,32 @@
printf("Loading %d bytes from %s\n", len, filename);
body = malloc(len+1);
- fread(body, 1, len, f);
- body[len]=0;
-
- /* Compile the js file specified */
- script = JS_CompileScript(jscx, jsroot, body, len, filename, lineno);
- free(body);
- if (script == NULL) {
- printf("Failed to compile js script: %s\n", filename);
+ if(body==NULL) {
+ fprintf(stderr, "load_jsfile: could not allocate memory for javascript file\n");
return 0;
}
- /* Execute the compiled script */
- success = JS_ExecuteScript(jscx, jsroot, script, &retval);
- if (success == JS_FALSE) {
- printf("Failed to execute js script: %s\n", filename);
+ fread(body, 1, len, f);
+ body[len]=0;
+ /*convert the script into jsstring, scripts assumed to be utf8*/
+ unicode_body=utf8_to_jsstring(body, &length, &utferror);
+
+ if(unicode_body==NULL)
+ {
+ fprintf(stderr, "load_jsfile: failed to convert script into javascript compatible unicode\n");
return 0;
}
+ if(utferror)
+ {
+ printf("The script '%s' does not appear to be utf-8. Some characters may have been discared. Please ensure this file is saved as UTF-8\n", filename);
+ }
+ /* Compile the js file specified */
+ /* script = JS_CompileScript(jscx, jsroot, body, len, filename, lineno); */
+ script = JS_CompileUCScript(jscx, jsroot, unicode_body, length, filename, lineno);
- return 1;
-}
-
-/* Load and execute a js file */
-int
-load_js(char *filename) {
- JSBool success;
- JSScript *script = NULL;
- jsval retval;
+ free(body);
+ free(unicode_body);
- /* Compile the js file specified */
- script = JS_CompileFile(jscx, jsroot, filename);
if (script == NULL) {
printf("Failed to compile js script: %s\n", filename);
return 0;
@@ -622,7 +677,7 @@
printf("Failed to execute js script: %s\n", filename);
return 0;
}
-
+
return 1;
}
@@ -685,14 +740,14 @@
JS_DefineFunction(jscx, jsroot, "exec", js_mwexec, 1, 0);
JS_DefineFunction(jscx, jsroot, "say", js_say, 1, 0);
JS_DefineFunction(jscx, jsroot, "wholist", js_wholist, 0, 1);
-
+
JS_DefineProperty(jscx, jsroot, "whoami", STRING_TO_JSVAL(JS_NewStringCopyZ(jscx,user->name)), NULL, NULL, JSPROP_READONLY|JSPROP_PERMANENT);
/* not for bbs user */
if (is_local) {
JS_DefineFunction(jscx, jsroot, "dbquery", js_doquery, 2, 1);
}
-
+
/* need additional functions :
* - one to bind functions to events (bind?) - yes, saves needing two files per javascript. code written outside of a function is executed when the script loads
* - one to load another script (include?) - possibly although most scripts are loaded from the .mwrc or using .load
More information about the mw-devel
mailing list