[mw-devel] MW3 r953 - trunk/src

psycodom at sucs.org psycodom at sucs.org
Mon Apr 23 08:29:17 BST 2007


Author: psycodom
Date: 2007-04-23 08:29:16 +0100 (Mon, 23 Apr 2007)
New Revision: 953

Modified:
   trunk/src/js.c
Log:
Adds UTF-8 support to javascript. Fixes #23

Modified: trunk/src/js.c
===================================================================
--- trunk/src/js.c	2007-04-22 20:00:35 UTC (rev 952)
+++ trunk/src/js.c	2007-04-23 07:29:16 UTC (rev 953)
@@ -51,58 +51,139 @@
 	JSCLASS_NO_OPTIONAL_MEMBERS
 };
 
-char *
-utf16tolocal(char * utf16, size_t len) {
-	char * local;
-	char * utf16cpy;
-	char * charset;
+// currently assuming all scripts and script arguments will be in utf8
+// if the users client is not utf8 then that issue needs to be dealt with elsewhere
+char *jsstring_to_utf8(jschar *the_jsstring, size_t len)
+{
+	char * utf8_string;
+	char * utf8_string_tmp;
+
+	char * jsstring_ptr;
+	char * utf8_ptr;
+
 	iconv_t conv;
 	size_t nconv;
-	size_t localbytesleft;
-	size_t utf16bytesleft;
-	char * localcpy;
 
-	/* TODO: charset should be replaced with the charset of the locale */
-	charset = "UTF-8";
-	conv = iconv_open(charset, "UTF16");
+	size_t utf8_bytes_left;
+	size_t jsstring_bytes_left;
+	size_t utf8_length;
+		
+	conv = iconv_open("UTF-8", "UCS-2");
 	if (conv == (iconv_t)-1) {
-		fprintf(stderr, "utf16tolocal bombed.\n");
+		fprintf(stderr, "jsstring_to_utf8: iconv_open() failed.\n");
 		return NULL;
 	}
 	
-	localbytesleft = (len) * sizeof(char) * 2; /* Urgh, x2 is kludge.. but how else? */
-	utf16bytesleft = (len) * sizeof(jschar);
-	local = (char *)malloc(localbytesleft);
+	utf8_bytes_left = (len) * sizeof(char) * 4; /* Urgh, x4 is kludge (x2 isn't big enough, some utf8 chars require 4 bytes and in a worst case scenario we'd run out) */
+	jsstring_bytes_left = (len) * sizeof(jschar);
+	utf8_string_tmp = (char *)malloc(utf8_bytes_left);
+	
+	if (utf8_string_tmp == NULL) {
+		fprintf(stderr, "jsstring_to_utf8: Could not allocate memory for iconv\n");
+		return NULL;
+	}
+	
+	utf8_ptr = utf8_string_tmp;
+	jsstring_ptr = (char *)the_jsstring;
 
-	if (local == NULL) {
-		fprintf(stderr, "Could not allocate memory for iconv\n");
+	while (jsstring_bytes_left > 0) {
+/*		printf("Before: localbytesleft: %d utf16bytesleft: %d\n",
+			(int)localbytesleft, (int)utf16bytesleft); */
+		nconv = iconv(conv, 
+			&jsstring_ptr, &jsstring_bytes_left, 
+			&utf8_ptr, &utf8_bytes_left);
+/*		printf("After: localbytesleft: %d utf16bytesleft: %d\n", 
+			(int)localbytesleft, (int)jsstring_bytes_left); */
+		if (nconv == (size_t)-1) {
+			fprintf(stderr, "jsstring_to_utf8: iconv() barfed with error %d - ", errno);
+			/* iconv barfed, but why? */
+			if (errno == EILSEQ || errno == EINVAL) {
+				/* invalid input sequence, skip it */
+				fprintf(stderr, "Invalid input sequence\n");
+				jsstring_ptr++;
+				jsstring_bytes_left--;
+				errno = 0;
+				continue;
+			} else {
+				/* some other error, recover what we can */
+				*(char *)utf8_ptr = '\0';
+				perror("iconv");
+				errno = 0;
+				break;
+			}
+		}
+	}
+	iconv_close(conv);
+	utf8_length=(len*4)-utf8_bytes_left;
+	
+	utf8_string=(char *)malloc(sizeof(char)*(utf8_length+1));
+	if(utf8_string==NULL) {
+		fprintf(stderr, "jsstring_to_utf8: Could not allocate memory for the utf8_string\n");
 		return NULL;
 	}
+	strncpy(utf8_string, utf8_string_tmp, utf8_length);
+	utf8_string[utf8_length]='\0';
+	free(utf8_string_tmp);
 	
-	localcpy = local;
-	utf16cpy = utf16;
+	return utf8_string;
+}
 
-	while (utf16bytesleft > 0) {
+jschar *utf8_to_jsstring(char *utf8_string, size_t *length, int *utferror)
+{
+	char * the_jsstring; // iconv uses char*, we'll cast to jschar* at the end.
+
+	char * jsstring_ptr;
+	char * utf8_ptr;
+
+	iconv_t conv;
+	size_t nconv;
+
+	size_t utf8_bytes_left;
+	size_t jsstring_bytes_left;
+	
+	*utferror=0;
+	conv = iconv_open("UCS-2", "UTF-8");
+	if (conv == (iconv_t)-1) {
+		fprintf(stderr, "utf8_to_jsstring: iconv_open() failed.\n");
+		return NULL;
+	}
+	
+	utf8_bytes_left = (strlen(utf8_string)+1) * sizeof(char); 
+	jsstring_bytes_left = (strlen(utf8_string)+1) * sizeof(jschar);
+	the_jsstring = (char *)malloc(jsstring_bytes_left);
+
+	if (the_jsstring == NULL) {
+		fprintf(stderr, "utf8_to_jsstring: Could not allocate memory for iconv\n");
+		return NULL;
+	}
+	
+	utf8_ptr = utf8_string;
+	jsstring_ptr = the_jsstring;
+
+	while (utf8_bytes_left > 0) {
 /*		printf("Before: localbytesleft: %d utf16bytesleft: %d\n",
 			(int)localbytesleft, (int)utf16bytesleft); */
 		nconv = iconv(conv, 
-			&utf16cpy, &utf16bytesleft, 
-			&localcpy, &localbytesleft);
+			&utf8_ptr, &utf8_bytes_left, 
+			&jsstring_ptr, &jsstring_bytes_left);
 /*		printf("After: localbytesleft: %d utf16bytesleft: %d\n", 
-			(int)localbytesleft, (int)utf16bytesleft); */
+			(int)localbytesleft, (int)jsstring_bytes_left); */
 		if (nconv == (size_t)-1) {
-			fprintf(stderr, "utf16tolocal barfed (%d) ", errno);
+			fprintf(stderr, "utf8_to_jsstring: iconv() barfed with error %d - ", errno);
 			/* iconv barfed, but why? */
 			if (errno == EILSEQ || errno == EINVAL) {
 				/* invalid input sequence, skip it */
 				fprintf(stderr, "Invalid input sequence\n");
-				utf16++;
-				utf16bytesleft--;
+				utf8_ptr++;
+				utf8_bytes_left--;
 				errno = 0;
+				*utferror=1;
 				continue;
 			} else {
 				/* some other error, recover what we can */
-				*(char *)localcpy = '\0';
+				/**(char *)jsstring_ptr = '\0';
+				jsstring_ptr++;
+				*(char *)jsstring_ptr = '\0';*/
 				perror("iconv");
 				errno = 0;
 				break;
@@ -110,15 +191,21 @@
 		}
 	}
 	iconv_close(conv);
-	return local;
+	
+	*length=strlen(utf8_string)-(jsstring_bytes_left/(sizeof(jschar)));
+	
+	return (jschar *)the_jsstring;
 }
 
+
+
 /* Function for printing to standard out from javascript (helpful for
  * debugging and demonstrates how to call C from js)
  */
 static JSBool
 js_print(JSContext *cx, JSObject __attribute__((unused)) *obj, uintN argc, jsval *argv, jsval __attribute__((unused)) *rval) {
 	JSString *jsmsg;
+	jschar *ucmsg;
 	size_t len;
 	uintN i;
 	
@@ -132,10 +219,15 @@
 		if (JSVAL_IS_STRING(argv[i])) {
 			jsmsg = JS_ValueToString(cx,argv[i]);
 			len = JS_GetStringLength(jsmsg);
-			//ucmsg = JS_GetStringChars(jsmsg);
-			//msg = utf16tolocal((char *)ucmsg, len);
-			msg = JS_GetStringBytes(jsmsg);
+			ucmsg = JS_GetStringChars(jsmsg);
+			msg = jsstring_to_utf8(ucmsg, len);
+			if(msg==NULL) {
+				printf("js_print: failed to convert jsstring to utf8\n");
+				return JS_FALSE;
+			}
+			//msg = JS_GetStringBytes(jsmsg);
 			display_message(msg, 0, 1);
+			free(msg);
 			//printf("%s",msg);
 		} else 
 		if (JSVAL_IS_NULL(argv[i])) {
@@ -155,6 +247,8 @@
 static JSBool
 js_mwexec(JSContext *cx, JSObject __attribute__((unused)) *obj, uintN argc, jsval *argv, jsval __attribute__((unused)) *rval) {
 	JSString *jsmsg;
+	jschar *ucmsg;
+	size_t len;
 	char * msg;
 	if (argc < 1) {
 		return JS_FALSE;
@@ -162,7 +256,14 @@
 	
 	if (JSVAL_IS_STRING(argv[0])) {
 		jsmsg = JS_ValueToString(cx,argv[0]);
-		msg = strdup(JS_GetStringBytes(jsmsg));
+		len = JS_GetStringLength(jsmsg);
+		ucmsg = JS_GetStringChars(jsmsg);
+		msg = jsstring_to_utf8(ucmsg, len);
+		if(msg==NULL) {
+			printf("js_mwexec: failed to convert jsstring to utf8\n");
+			return JS_FALSE;
+		}
+		//msg = strdup(JS_GetStringBytes(jsmsg));
 		DoCommand(msg, chattable);
 		free(msg);
 		return JS_TRUE;
@@ -174,7 +275,11 @@
 static JSBool
 js_say(JSContext *cx, JSObject __attribute__((unused)) *obj, uintN argc, jsval *argv, jsval __attribute__((unused)) *rval) {
 	JSString *jsmsg;
+	jschar *ucmsg;
+	size_t len;
 	char * msg;
+	char saymsg[MAXTEXTLENGTH];
+	
 	if (argc < 1) {
 		return JS_FALSE;
 	}
@@ -187,8 +292,20 @@
 	
 	if (JSVAL_IS_STRING(argv[0])) {
 		jsmsg = JS_ValueToString(cx,argv[0]);
-		msg = strdup(JS_GetStringBytes(jsmsg));
-		chat_say(msg);
+		len = JS_GetStringLength(jsmsg);
+		ucmsg = JS_GetStringChars(jsmsg);
+		
+		msg = jsstring_to_utf8(ucmsg, len);
+		if(msg==NULL) {
+			printf("js_say: failed to convert jsstring to utf8\n");
+			return JS_FALSE;
+		}
+		//msg = strdup(JS_GetStringBytes(jsmsg));
+		
+		// things passed to chatsay may end up being passed to apply_gag which expects a buffer MAXTEXTLENGTH long.
+		strncpy(saymsg,msg,MAXTEXTLENGTH-1);
+		saymsg[MAXTEXTLENGTH]='\0';
+		chat_say(saymsg);
 		free(msg);
 		return JS_TRUE;
 	}
@@ -389,74 +506,7 @@
 	return JS_TRUE;
 }
 
-/* Convert a string from local charset to a string of jschar which
-   JS_NewUCString() needs to create a new JSString with unicode 
-   characters in. An appropriate jschar* is created by casting 
-   UTF-16 data to jschar*, which is why we encode to UTF-16 here. */
-jschar *
-local2jschars(char * local) {
-	char * utf16;
-	char * utf16cpy;
-	char * charset;
-	iconv_t conv;
-	size_t nconv;
-	size_t localbytesleft;
-	size_t utf16bytesleft;
-	char * localcpy;
-
-	/* TODO: charset should be replaced with the charset of the locale */
-	charset = "UTF-8";
-	/* Little endian UTF-16 seems to be the correct encoding. */
-	conv = iconv_open("UTF-16LE", charset);
-	if (conv == (iconv_t)-1) {
-		fprintf(stderr, "local2jschars bombed.\n");
-		return NULL;
-	}
-	
-	localbytesleft = (strlen(local)) * sizeof(char);
-	utf16bytesleft = (strlen(local)) * sizeof(jschar);
-	utf16 = (char *)malloc(utf16bytesleft);
-
-	if (utf16 == NULL) {
-		fprintf(stderr, "Could not allocate memory for iconv\n");
-		return NULL;
-	}
-	
-	localcpy = local;
-	utf16cpy = utf16;
-
-	while (localbytesleft > 0) {
-		/* printf("Before: localbytesleft: %d utf16bytesleft: %d\n",
-			localbytesleft, utf16bytesleft); */
-		nconv = iconv(conv, 
-			&localcpy, &localbytesleft, 
-			&utf16cpy, &utf16bytesleft);
-		/* printf("After: localbytesleft: %d utf16bytesleft: %d\n",
-			localbytesleft, utf16bytesleft); */
-		if (nconv == (size_t)-1) {
-			fprintf(stderr, "local2jschars barfed (%d)\n", errno);
-			/* iconv barfed, but why? */
-			if (errno == EILSEQ || errno == EINVAL) {
-				/* invalid input sequence, skip it */
-				fprintf(stderr, "Invalid input sequence\n");
-				local++;
-				localbytesleft--;
-				errno = 0;
-				continue;
-			} else {
-				/* some other error, recover what we can */
-				fprintf(stderr, "Some other error\n");
-				*(char *)utf16cpy = '\0';
-				perror("iconv");
-				errno = 0;
-				break;
-			}
-		}
-	}
-	iconv_close(conv);
-	return (jschar *)utf16;
-}
-
+/* prints the type of a jsval */
 void show_type(char *name, jsval j)
 {
 	printf("%s is:",name);
@@ -480,14 +530,16 @@
 }
 
 /* Execute some javascript commands */
-int
-js_exec(char * name, int argc, char **argvc) {
+int js_exec(char * name, int argc, char **argvc) {
 	int i;
 	jschar * js_string;
 	jsval rval;
 	jsval *argv;
 	JSBool ret;
-
+	jschar *ucarg;
+	size_t uclen;
+	int utferror;
+	
 	js_string = NULL;
 	argv=calloc(argc,sizeof(jsval));
 
@@ -503,8 +555,9 @@
 			}
 		//	js_string = local2jschars(argvc[i]);
 			if (js_string != NULL) {
-		//		argv[i] = STRING_TO_JSVAL(JS_NewUCString(jscx, js_string, strlen(argvc[i])));
-				argv[i] = STRING_TO_JSVAL(JS_NewStringCopyZ(jscx, argvc[i]));
+				ucarg=utf8_to_jsstring(argvc[i], &uclen, &utferror);
+				argv[i] = STRING_TO_JSVAL(JS_NewUCStringCopyZ(jscx, ucarg));
+		//		argv[i] = STRING_TO_JSVAL(JS_NewStringCopyZ(jscx, argvc[i]));
 			} else {
 				argv[i] = STRING_TO_JSVAL(JS_NewStringCopyZ(jscx, "(Garbled string)"));
 			}
@@ -564,14 +617,20 @@
 	free(line);
 }
 
+/* Load and execute a file in javascript */
+/* Files are assumed to be in utf-8 and an error is reported if it isn't */
+/* Non utf-8 chars are stripped */
 int load_jsfile(FILE *f, char *filename)
 {
 	char *body;
+	jschar *unicode_body;
 	int where, len;
+	size_t length;
 	JSBool success;
 	JSScript *script = NULL;
 	jsval retval;
 	uintN lineno=0;
+	int utferror;
 
 	where = ftell(f);
 	fseek(f, 0, SEEK_END);
@@ -581,36 +640,32 @@
 	printf("Loading %d bytes from %s\n", len, filename);
 
 	body = malloc(len+1);
-	fread(body, 1, len, f);
-	body[len]=0;
-
-	/* Compile the js file specified */
-	script = JS_CompileScript(jscx, jsroot, body, len, filename, lineno);
-	free(body);
-	if (script == NULL) {
-		printf("Failed to compile js script: %s\n", filename);
+	if(body==NULL) {
+		fprintf(stderr, "load_jsfile: could not allocate memory for javascript file\n");
 		return 0;
 	}
 	
-	/* Execute the compiled script */
-	success = JS_ExecuteScript(jscx, jsroot, script, &retval);
-	if (success == JS_FALSE) {
-		printf("Failed to execute js script: %s\n", filename);
+	fread(body, 1, len, f);
+	body[len]=0;
+	/*convert the script into jsstring, scripts assumed to be utf8*/
+	unicode_body=utf8_to_jsstring(body, &length, &utferror);
+	
+	if(unicode_body==NULL)
+	{
+		fprintf(stderr, "load_jsfile: failed to convert script into javascript compatible unicode\n");
 		return 0;
 	}
+	if(utferror)
+	{
+		printf("The script '%s' does not appear to be utf-8.  Some characters may have been discared.  Please ensure this file is saved as UTF-8\n", filename);
+	}
+	/* Compile the js file specified */
+	/* script = JS_CompileScript(jscx, jsroot, body, len, filename, lineno); */
+	script = JS_CompileUCScript(jscx, jsroot, unicode_body, length, filename, lineno);
 
-	return 1;
-}
-
-/* Load and execute a js file */
-int
-load_js(char *filename) {
-	JSBool success;
-	JSScript *script = NULL;
-	jsval retval;
+	free(body);
+	free(unicode_body);
 	
-	/* Compile the js file specified */
-	script = JS_CompileFile(jscx, jsroot, filename);
 	if (script == NULL) {
 		printf("Failed to compile js script: %s\n", filename);
 		return 0;
@@ -622,7 +677,7 @@
 		printf("Failed to execute js script: %s\n", filename);
 		return 0;
 	}
-	
+
 	return 1;
 }
 
@@ -685,14 +740,14 @@
 	JS_DefineFunction(jscx, jsroot, "exec", js_mwexec, 1, 0);
 	JS_DefineFunction(jscx, jsroot, "say", js_say, 1, 0);
 	JS_DefineFunction(jscx, jsroot, "wholist", js_wholist, 0, 1);
-
+	
 	JS_DefineProperty(jscx, jsroot, "whoami", STRING_TO_JSVAL(JS_NewStringCopyZ(jscx,user->name)), NULL, NULL, JSPROP_READONLY|JSPROP_PERMANENT);
 
 	/* not for bbs user */
 	if (is_local) {
 		JS_DefineFunction(jscx, jsroot, "dbquery", js_doquery, 2, 1);
 	}
-
+	
 	/* need additional functions : 
 	 * - one to bind functions to events (bind?) - yes, saves needing two files per javascript.  code written outside of a function is executed when the script loads
 	 * - one to load another script (include?) - possibly although most scripts are loaded from the .mwrc or using .load





More information about the mw-devel mailing list