[mw-devel] MW3 r958 - trunk/src

psycodom at sucs.org psycodom at sucs.org
Tue May 1 10:13:26 BST 2007


Author: psycodom
Date: 2007-05-01 10:13:26 +0100 (Tue, 01 May 2007)
New Revision: 958

Modified:
   trunk/src/iconv.c
   trunk/src/js.c
   trunk/src/main.c
   trunk/src/strings.c
Log:
Improves locale support and charset conversions.
  Input converts from local to utf8
  Output converts from utf8 to local
  If invalid chars do sneak into output before conversion to local they are marked and a warning issued
    (some mw-script and gags may cause this to happen atm)
     


Modified: trunk/src/iconv.c
===================================================================
--- trunk/src/iconv.c	2007-04-29 20:15:14 UTC (rev 957)
+++ trunk/src/iconv.c	2007-05-01 09:13:26 UTC (rev 958)
@@ -31,10 +31,10 @@
 	output_buffer: pointer to the memory to put the converted string into (must be allocated)
 	output_charset: the iconv encoding description string of the output string
 	output_buffer_length: the number of bytes allocated for output_buffer
+	output_bytes_used: on success/toolong/invalchars the number of bytes used in output_buffer
+	irreversible_count: on _success_ the number of chars that were converted in an irreversible way
 	unconverted_bytes: on _toolong_ the number of bytes in input buffer that were not converted.
-	output_bytes_used: on success/toolong/invalchars the number of bytes used in output_buffer
 	invalid_count: on invalchars the number of invalid chars that were discovered in the string
-	irreversible_count: on _success_ the number of chars that were converted in an irreversible way
 	substitute: if an invalid char is discovered replace it with this ***UTF-8*** string
 					(the utf8 string is converted into your desired charset)
 	
@@ -59,8 +59,8 @@
 	size_t nconv;
 	int local_invalid_count=0;
 	int i, too_big=0, iconv_failed=0, retval=0;
+	char output_charset_ignore[50];
 	
-	
 	if(output_charset==NULL) {
 		return ENOOUTPUTCS;
 	}
@@ -73,9 +73,9 @@
 	if(input_buffer==NULL) {
 		return ENOINPUTBUFF;
 	}
-	if(input_length<1) {
+/*	if(input_length<1) {
 		return EZEROLENTGHINPUT;
-	}
+	}*/ //thinking about it this isn't really an error.  if you do this you will still get a null string back in your chosen encoding.
 	
 	if( strcmp(output_charset,"LOCAL")==0 ) {
 		if(local_charset!=NULL) {
@@ -84,6 +84,16 @@
 			output_charset = null_string;
 		}
 	}
+	if( strcmp(output_charset,"LOCAL//TRANSLIT")==0 ) {
+		if(local_charset!=NULL) {
+			output_charset = output_charset_ignore;
+			snprintf(output_charset_ignore, 50, "%s//TRANSLIT", local_charset);
+			output_charset_ignore[50]='\0';
+			
+		} else {
+			output_charset = null_string;
+		}
+	}
 	
 	if( strcmp(input_charset,"LOCAL")==0 ) {
 		if(local_charset!=NULL) {
@@ -92,7 +102,6 @@
 			input_charset = null_string;
 		}
 	}
-	
 	conversion_descriptor = iconv_open(output_charset, "UTF-8");
 	if(conversion_descriptor == (iconv_t)-1) {
 		if(errno == EINVAL) {

Modified: trunk/src/js.c
===================================================================
--- trunk/src/js.c	2007-04-29 20:15:14 UTC (rev 957)
+++ trunk/src/js.c	2007-05-01 09:13:26 UTC (rev 958)
@@ -83,14 +83,14 @@
 {
 	uintN i;
 	int conversion_result;
-	char msg[MAXTEXTLENGTH];
+	char msg[MAXTEXTLENGTH*4];
 	if (argc < 1) {
 		return JS_TRUE;
 	}
 	
 	for (i = 0; i<argc; i++) {
 		if (JSVAL_IS_STRING(argv[i])) {
-			conversion_result=jsval_to_utf8string(cx, argv[0], msg, MAXTEXTLENGTH);
+			conversion_result=jsval_to_utf8string(cx, argv[0], msg, MAXTEXTLENGTH*4);
 			if( conversion_result >= 0) {
 				if( conversion_result & WOUTPUTTOOSHORT ) {
 					printf("JavaScript print() command produced too much text.  It was truncated\n");
@@ -123,7 +123,7 @@
 	}
 	
 	if (JSVAL_IS_STRING(argv[0])) {
-		conversion_result=jsval_to_utf8string(cx, argv[0], msg, MAXTEXTLENGTH);
+		conversion_result=jsval_to_utf8string(cx, argv[0], msg, MAXTEXTLENGTH-100);
 		if( conversion_result >= 0) {
 			if( conversion_result & WOUTPUTTOOSHORT ) {
 				printf("JavaScript exec() command produced too much text.  It was truncated\n");
@@ -151,7 +151,7 @@
 	}
 	
 	if (JSVAL_IS_STRING(argv[0])) {
-		conversion_result=jsval_to_utf8string(cx, argv[0], msg, MAXTEXTLENGTH);
+		conversion_result=jsval_to_utf8string(cx, argv[0], msg, MAXTEXTLENGTH-100);
 		if( conversion_result >= 0) {
 			if( conversion_result & WOUTPUTTOOSHORT ) {
 				printf("JavaScript say() command produced too much text.  It was truncated\n");

Modified: trunk/src/main.c
===================================================================
--- trunk/src/main.c	2007-04-29 20:15:14 UTC (rev 957)
+++ trunk/src/main.c	2007-05-01 09:13:26 UTC (rev 958)
@@ -50,6 +50,7 @@
 #include <pwd.h>
 #include <grp.h>
 
+
 extern struct room myroom;
 
 #include "alias.h"
@@ -788,6 +789,7 @@
 
 void accept_line(char *line)
 {
+	int conversion_result;
 	if (line == NULL)
 	{
 		*comm=0;
@@ -801,43 +803,58 @@
 		}
 	} else
 	{
-		strncpy(comm, line, MAXTEXTLENGTH-1);
-		comm[MAXTEXTLENGTH-1] = '\0';
-		eof_count=0;
-		strip_str(comm); 
-		if (new_mail_waiting==1)
+		
+		//strncpy(comm, line, MAXTEXTLENGTH-1);
+		//comm[MAXTEXTLENGTH-1] = '\0';
+		conversion_result=convert_string_charset(line, "LOCAL", strlen(line), comm, "UTF-8", MAXTEXTLENGTH-100, NULL, NULL, NULL, NULL, "?");
+		if(conversion_result >= 0)
 		{
-			display_message(_("\03305*** You have new mail.\n"), 1, 1);
-			new_mail_waiting=0;
-		}else
-		if (new_mail_waiting>1)
-		{
-			char tbuff[MAXTEXTLENGTH];
-			snprintf(tbuff, MAXTEXTLENGTH-1, _("\03305*** You have %d new mail messages.\n"),new_mail_waiting);
-			display_message(tbuff, 1, 1);
-			new_mail_waiting=0;
+			if( conversion_result & WINVALIDCHARS )
+			{
+				printf("Warning: Your input contained characters that are invalid in your current locale.\n  Please ensure your terminal and locale are using the same character set\n"); 
+			}
+			eof_count=0;
+			strip_str(comm); 
+			if (new_mail_waiting==1)
+			{
+				display_message(_("\03305*** You have new mail.\n"), 1, 1);
+				new_mail_waiting=0;
+			}else
+			if (new_mail_waiting>1)
+			{
+				char tbuff[MAXTEXTLENGTH];
+				snprintf(tbuff, MAXTEXTLENGTH-1, _("\03305*** You have %d new mail messages.\n"),new_mail_waiting);
+				display_message(tbuff, 1, 1);
+				new_mail_waiting=0;
+			}
+			user->idletime=time(0);
+			snprintf(user->doing, DOINGSIZE-1, "Nothing");
+			update_user(user,userposn);
+			if (*comm == 0) return;
+	#ifdef RWHO
+			rwho_update();
+	#endif 
+			disable_rl(0);
+			update_user(user,userposn);
+			stack_str(comm);
 		}
-		user->idletime=time(0);
-		snprintf(user->doing, DOINGSIZE-1, "Nothing");
-		update_user(user,userposn);
-		if (*comm == 0) return;
-#ifdef RWHO
-		rwho_update();
-#endif 
-		disable_rl(0);
-		update_user(user,userposn);
-		stack_str(comm);
 	}
 }
 
 void accept_command(char *comm)
 {
 	int c;
-
+	char history_comm[MAXTEXTLENGTH];
+	int conversion_result;
+	
 	if (UseRL)
 	{
-		add_history(comm);
-		saved_history++;
+		conversion_result=convert_string_charset(comm, "UTF-8", strlen(comm), history_comm, "LOCAL", MAXTEXTLENGTH, NULL, NULL, NULL, NULL, NULL);
+		if(conversion_result >= 0)
+		{
+			add_history(history_comm);
+			saved_history++;
+		}
 	}
 	if (cm_flags(user->chatmode,CM_ONCHAT,CM_MODE_ANY))
 	{	
@@ -1177,12 +1194,31 @@
 	else return 0;
 }
 
+void printline_in_local(char *line, int *warnings, size_t *not_in_local)
+{
+	char local_line[MAXTEXTLENGTH];
+	int		conversion_result;
+	size_t	irreversible=0, outbyte=0, inbyte=0;
+	
+	conversion_result=convert_string_charset(line, "UTF-8", strlen(line), local_line, "LOCAL//TRANSLIT", MAXTEXTLENGTH, &outbyte, &irreversible, &inbyte, NULL, "#");
+	if(conversion_result>=0)
+	{
+		*not_in_local += irreversible;
+		*warnings = *warnings | conversion_result;
+		write(1,local_line,strlen(local_line));
+
+	}
+	else
+	{
+		printf("Error: An error of type %d occured occured trying to convert a message into your local charset.\n", conversion_result);
+	}
+
+}
+
 void display_message(char *text, int beeps, int newline)
 {
-	wchar_t *	wtext;
 	static int	count = 0;
-	int		len, wlen;
-	int		bytes;
+	int		len;
 	int		ptr;
 	int		concealed = 0;
 	char		line[MAXTEXTLENGTH];	
@@ -1191,6 +1227,8 @@
 	int		screen_width = screen_w();
 	char		*colr = NULL;
 	int		endline;
+	int		convert_warnings=0;
+	size_t	not_in_local=0;
 
 	if (text==NULL || strlen(text)==0)
 	{
@@ -1206,34 +1244,54 @@
 	hascolour=0;
 	colrstart=-1;
 
-	wtext = malloc(sizeof(wchar_t)*(len+1));
-	wlen = mbstowcs(wtext, text, len);
-	if (wlen == -1) {
-	    printf("warning: display_message received invalid byte sequence\n");
-	    free(wtext);
-	    return;
-	}
-
-	while (wlen-ptr > 0)
+	while (len-ptr > 0)
 	{
-		if (wtext[ptr]==033)
+		if (text[ptr]==033)
 		{
 			char str[3];
-
+			ptr++;
+			if(len-ptr>0)
+			{
+				if( ((unsigned char)text[ptr] & 192) == 192 )
+				{
+					str[0]='-';
+					while( ((unsigned char)text[ptr] & 192) == 128 && len-ptr > 0)
+					{
+						ptr++;
+					}
+				}
+				else
+				{
+					str[0]=text[ptr];
+					ptr++;
+				}
+			}
+			if(len-ptr>0)
+			{
+				if(((unsigned char)text[ptr] & 192)==192 )
+				{
+					str[1]='-';
+					while( ((unsigned char)text[ptr] & 192) == 128 && len-ptr > 0)
+					{
+						ptr++;
+					}
+				}
+				else
+				{
+					str[1]=text[ptr];
+					ptr++;
+				}
+			}
+				
 			/* escape sequence, skip next two chars */
 			if (s_colouroff(user->special))
 			{
-				ptr+=3;
 				continue;
 			}
 			hascolour++;
 
-			/* note that these characters are always ASCII (unless the sequence
-			 * is invalid :)), so no data is lost in these casts. */
-			str[0]=(char)wtext[ptr+1];
-			str[1]=(char)wtext[ptr+2];	
 			str[2]=0;
-
+			
 			colr=colour(str, &concealed);
 
 			if (colr!=NULL)
@@ -1246,42 +1304,62 @@
 				for (j=0;j<strlen(colr);j++)
 					line[i++]=colr[j];
 			}
-			ptr+=3;
 		}else
-		if (iswprint(wtext[ptr]))
+		if (text[ptr]>=040 && text[ptr]<=0176)
 		{
-			if (!concealed) {
-				bytes = wcrtomb(&line[i], wtext[ptr], NULL);
-				if (bytes == -1) {
-					/* impossible, since this string came from the
-					 * (presumably infallible) library function mbstowcs */
-					printf("warning: iswprint created invalid character."
-						" This may be a bug in libc.\n");
-					/* write whatever we managed to salvage of it */
-					line[i] = 0;
-					write(1,line,i-1);
-					free(wtext);
-					return;
-				} else {
-					count += wcwidth(wtext[ptr]);
-					i += bytes;
+			if (concealed)
+			{
+				ptr++;
+			}
+			else
+			{
+				line[i++]=text[ptr++];
+				count++;
+				colrstart = -1;
+			}
+		}else
+		if ( (text[ptr] & 192) == 192 )
+		{
+			if (concealed)
+			{
+				ptr++;
+			}
+			else
+			{
+				line[i++]=text[ptr++];
+				count++;
+				colrstart = -1;
+			}
+			// stops us randomly spliting over a unicode multibyte character
+			while( ((unsigned char)text[ptr] & 192) == 128 && len-ptr > 0 )
+			{
+				if (concealed)
+				{
+					ptr++;
+				}
+				else
+				{
+					line[i++]=text[ptr++];
 					colrstart = -1;
 				}
 			}
+			
+		}
+		else
+		{
 			ptr++;
-		} else
-			ptr++;
-
+		}
 		if (i >= (MAXTEXTLENGTH-20))
 		{
-			write(1,line,strlen(line));
+			line[i]='\0';
+			printline_in_local(line, &convert_warnings, &not_in_local);
 			i=0;
 		}
 
 		if (s_nolinewrap(user->special))
-			endline = (ptr >= wlen);
+			endline = (ptr >= len);
 		else
-			endline = ((count >= screen_width) || (ptr >= wlen));
+			endline = ((count >= screen_width) || (ptr >= len));
 
 		if (endline)
 		{
@@ -1298,8 +1376,8 @@
 				count=0;
 			}
 
-			line[i]=0;
-			write(1,line,strlen(line));
+			line[i]='\0';
+			printline_in_local(line, &convert_warnings, &not_in_local);
 
 			if (ptr>=len)
 			{
@@ -1321,7 +1399,23 @@
 			}
 		}
 	}
-
+	if(convert_warnings & WOUTPUTTOOSHORT)
+	{
+		printf("Warning: The buffer provided for conversion of the last line to your local charset was not big enough(some chars might have been lost) please kick a developer to fix this.\n");
+	}
+	if(convert_warnings & WINVALIDCHARS)
+	{
+		printf("Warning: There were invalid characters in the message just printed that were replaced with '#'\n");
+	}
+	if(convert_warnings & WICONVFAIL)
+	{
+		printf("Warning: Something went wrong with iconv while printing the last message, some characters may have been lost\n");
+	}
+	if(not_in_local > 0)
+	{
+		printf("Warning: %d characters in the last message could not be displayed in your current character set.\n You might want to consider using UTF-8.\n", (int)not_in_local);
+	}
+	
 	/*if (UseRL && inreadline) rl_forced_update_display();*/
 	if (!u_beep(user->status) && beeps)
 	{

Modified: trunk/src/strings.c
===================================================================
--- trunk/src/strings.c	2007-04-29 20:15:14 UTC (rev 957)
+++ trunk/src/strings.c	2007-05-01 09:13:26 UTC (rev 958)
@@ -175,16 +175,10 @@
 	
 	len=strlen(string);
 	array=(char *)malloc(len+1);
-	for (i=0;i<len;)
+	for (i=0;i<len;i++)
 	{
-		wchar_t wc;
-		int width = mbtowc(&wc, &string[i], len-i);
-		int limit = i + width;
-		int printable = iswprint(wc) || string[i]==033;
-		if (width == -1) /* probably an invalid sequence at the end */
-		    break;
-		for (; i<limit; i++)
-		    if (printable) array[ptr++]=string[i];
+		if (string[i]==033 || (string[i]>=040 && string[i]<=0176) || (unsigned char)string[i]>0177)
+			array[ptr++]=string[i];
 	}
 	array[ptr]=0;
 	strcpy(string,array);
@@ -193,7 +187,6 @@
 
 
 
-
 /* common file functions */
 
 char *makepath(char *a, char *b, char *c)





More information about the mw-devel mailing list