Changeset - r21659:b8650d138a10
[Not reviewed]
master
0 1 0
rubidium - 10 years ago 2014-09-07 16:03:41
rubidium@openttd.org
(svn r26797) -Codechange: rewrite the UTF8 reading code to make use of already existing functions instead of partially trying to implemented them
1 file changed with 12 insertions and 25 deletions:
0 comments (0 inline, 0 general)
src/script/squirrel.cpp
Show inline comments
 
@@ -389,33 +389,20 @@ static WChar _io_file_lexfeed_ASCII(SQUs
 

	
 
static WChar _io_file_lexfeed_UTF8(SQUserPointer file)
 
{
 
	static const SQInteger utf8_lengths[16] =
 
	{
 
		1, 1, 1, 1, 1, 1, 1, 1, /* 0000 to 0111 : 1 byte (plain ASCII) */
 
		0, 0, 0, 0,             /* 1000 to 1011 : not valid */
 
		2, 2,                   /* 1100, 1101 : 2 bytes */
 
		3,                      /* 1110 : 3 bytes */
 
		4                       /* 1111 : 4 bytes */
 
	};
 
	static unsigned char byte_masks[5] = {0, 0, 0x1F, 0x0F, 0x07};
 
	unsigned char inchar;
 
	WChar c = 0;
 
	if (((SQFile *)file)->Read(&inchar, sizeof(inchar), 1) != 1) return 0;
 
	c = inchar;
 
	char buffer[5];
 

	
 
	/* Read the first character, and get the length based on UTF-8 specs. If invalid, bail out. */
 
	if (((SQFile *)file)->Read(buffer, sizeof(buffer[0]), 1) != 1) return 0;
 
	uint len = Utf8EncodedCharLen(buffer[0]);
 
	if (len == 0) return -1;
 

	
 
	if (c >= 0x80) {
 
		SQInteger tmp;
 
		SQInteger codelen = utf8_lengths[c >> 4];
 
		if (codelen == 0) return 0;
 
	/* Read the remaining bits. */
 
	if (len > 1 && ((SQFile *)file)->Read(buffer + 1, sizeof(buffer[0]), len - 1) != len - 1) return 0;
 

	
 
		tmp = c & byte_masks[codelen];
 
		for (SQInteger n = 0; n < codelen - 1; n++) {
 
			tmp <<= 6;
 
			if (((SQFile *)file)->Read(&inchar, sizeof(inchar), 1) != 1) return 0;
 
			tmp |= inchar & 0x3F;
 
		}
 
		c = tmp;
 
	}
 
	/* Convert the character, and when definitely invalid, bail out as well. */
 
	WChar c;
 
	if (Utf8Decode(&c, buffer) != len) return -1;
 

	
 
	return c;
 
}
 

	
0 comments (0 inline, 0 general)