MAME SVN History

199869 Revisions

r26171 Friday 15th November, 2013 at 09:27:03 UTC by Jürgen Buchmüller
Move string.h like functions to unicode.c; change comments to Doxygen/Qt style.

[/branches/alto2/src/emu/debug]	debugvw.h dvdisasm.c dvtext.c textbuf.c
[/branches/alto2/src/lib/util]	unicode.c unicode.h

branches/alto2/src/emu/debug/debugvw.h
r26170	r26171
19	19	// CONSTANTS
20	20	//**************************************************************************
21	21
22		// types passed to debug_view_manager::alloc_view()
	22	//! types passed to debug_view_manager::alloc_view()
23	23	enum debug_view_type
24	24	{
25	25	DVT_NONE,
r26170	r26171
35	35	};
36	36
37	37
38		// notifications passed to view_notify()
	38	//! notifications passed to view_notify()
39	39	enum debug_view_notification
40	40	{
41	41	VIEW_NOTIFY_NONE,
r26170	r26171
46	46
47	47
48	48	// attribute bits for debug_view_char.attrib
49		const UINT8 DCA_NORMAL = 0x00; // black on white
50		const UINT8 DCA_CHANGED = 0x01; // red foreground
51		const UINT8 DCA_SELECTED = 0x02; // light red background
52		const UINT8 DCA_INVALID = 0x04; // dark blue foreground
53		const UINT8 DCA_DISABLED = 0x08; // darker foreground
54		const UINT8 DCA_ANCILLARY = 0x10; // grey background
55		const UINT8 DCA_CURRENT = 0x20; // yellow background
56		const UINT8 DCA_COMMENT = 0x40; // green foreground
57		const UINT8 DCA_VISITED = 0x80; // light blue background
	49	const UINT8 DCA_NORMAL = 0x00; //!< black on white
	50	const UINT8 DCA_CHANGED = 0x01; //!< red foreground
	51	const UINT8 DCA_SELECTED = 0x02; //!< light red background
	52	const UINT8 DCA_INVALID = 0x04; //!< dark blue foreground
	53	const UINT8 DCA_DISABLED = 0x08; //!< darker foreground
	54	const UINT8 DCA_ANCILLARY = 0x10; //!< grey background
	55	const UINT8 DCA_CURRENT = 0x20; //!< yellow background
	56	const UINT8 DCA_COMMENT = 0x40; //!< green foreground
	57	const UINT8 DCA_VISITED = 0x80; //!< light blue background
58	58
59	59
60	60	// special characters that can be passed to process_char()
61		const int DCH_UP = 1; // up arrow
62		const int DCH_DOWN = 2; // down arrow
63		const int DCH_LEFT = 3; // left arrow
64		const int DCH_RIGHT = 4; // right arrow
65		const int DCH_PUP = 5; // page up
66		const int DCH_PDOWN = 6; // page down
67		const int DCH_HOME = 7; // home
68		const int DCH_CTRLHOME = 8; // ctrl+home
69		const int DCH_END = 9; // end
70		const int DCH_CTRLEND = 10; // ctrl+end
71		const int DCH_CTRLRIGHT = 11; // ctrl+right
72		const int DCH_CTRLLEFT = 12; // ctrl+left
	61	const int DCH_UP = 1; //!< up arrow
	62	const int DCH_DOWN = 2; //!< down arrow
	63	const int DCH_LEFT = 3; //!< left arrow
	64	const int DCH_RIGHT = 4; //!< right arrow
	65	const int DCH_PUP = 5; //!< page up
	66	const int DCH_PDOWN = 6; //!< page down
	67	const int DCH_HOME = 7; //!< home
	68	const int DCH_CTRLHOME = 8; //!< ctrl+home
	69	const int DCH_END = 9; //!< end
	70	const int DCH_CTRLEND = 10; //!< ctrl+end
	71	const int DCH_CTRLRIGHT = 11; //!< ctrl+right
	72	const int DCH_CTRLLEFT = 12; //!< ctrl+left
73	73
74	74
75	75	// special characters that can be passed to process_click()
76		const int DCK_LEFT_CLICK = 1; // left instantaneous click
77		const int DCK_RIGHT_CLICK = 2; // right instantaneous click
78		const int DCK_MIDDLE_CLICK = 3; // middle instantaneous click
	76	const int DCK_LEFT_CLICK = 1; //!< left instantaneous click
	77	const int DCK_RIGHT_CLICK = 2; //!< right instantaneous click
	78	const int DCK_MIDDLE_CLICK = 3; //!< middle instantaneous click
79	79
80	80
81	81	//**************************************************************************
r26170	r26171
86	86	class debug_view;
87	87
88	88
89		// OSD callback function for a view
	89	//! OSD callback function for a view
90	90	typedef void (debug_view_osd_update_func)(debug_view &view, void osdprivate);
91	91
92	92
93		// a single "character" in the debug view has an Unicode value and an attribute byte
	93	//! a single "character" in the debug view has an Unicode value and an attribute byte
94	94	struct debug_view_char
95	95	{
96	96	unicode_char uchar;
r26170	r26171
98	98	};
99	99
100	100
101		// pair of X,Y coordinates for sizing
	101	//! pair of X,Y coordinates for sizing
102	102	class debug_view_xy
103	103	{
104	104	public:
r26170	r26171
109	109	};
110	110
111	111
112		// debug_view_sources select from multiple sources available within a view
	112	//! debug_view_sources select from multiple sources available within a view
113	113	class debug_view_source
114	114	{
115	115	DISABLE_COPYING(debug_view_source);
r26170	r26171
129	129
130	130	private:
131	131	// internal state
132		debug_view_source * m_next; // link to next item
133		astring m_name; // name of the source item
134		device_t * m_device; // associated device (if applicable)
135		bool m_is_octal; // is view in octal or hex
	132	debug_view_source * m_next; //!< link to next item
	133	astring m_name; //!< name of the source item
	134	device_t * m_device; //!< associated device (if applicable)
	135	bool m_is_octal; //!< is view in octal or hex
136	136	};
137	137
138	138
r26170	r26171
161	161
162	162	private:
163	163	// internal state
164		running_machine & m_machine; // reference to our machine
165		debug_view_source * m_head; // head of the list
166		debug_view_source * m_tail; // end of the tail
167		UINT32 m_count; // number of items in the list
	164	running_machine & m_machine; //!< reference to our machine
	165	debug_view_source * m_head; //!< head of the list
	166	debug_view_source * m_tail; //!< end of the tail
	167	UINT32 m_count; //!< number of items in the list
168	168	};
169	169
170	170
r26170	r26171
223	223
224	224	protected:
225	225	// core view data
226		debug_view * m_next; // link to the next view
227		debug_view_type m_type; // type of view
228		const debug_view_source *m_source; // currently selected data source
229		debug_view_source_list m_source_list; // list of available data sources
	226	debug_view * m_next; //!< link to the next view
	227	debug_view_type m_type; //!< type of view
	228	const debug_view_source *m_source; //!< currently selected data source
	229	debug_view_source_list m_source_list; //!< list of available data sources
230	230
231	231	// OSD data
232		debug_view_osd_update_func m_osdupdate; // callback for the update
233		void * m_osdprivate; // OSD-managed private data
	232	debug_view_osd_update_func m_osdupdate; //!< callback for the update
	233	void * m_osdprivate; //!< OSD-managed private data
234	234
235	235	// visibility info
236		debug_view_xy m_visible; // visible size (in rows and columns)
237		debug_view_xy m_total; // total size (in rows and columns)
238		debug_view_xy m_topleft; // top-left visible position (in rows and columns)
239		debug_view_xy m_cursor; // cursor position
240		bool m_supports_cursor; // does this view support a cursor?
241		bool m_cursor_visible; // is the cursor visible?
	236	debug_view_xy m_visible; //!< visible size (in rows and columns)
	237	debug_view_xy m_total; //!< total size (in rows and columns)
	238	debug_view_xy m_topleft; //!< top-left visible position (in rows and columns)
	239	debug_view_xy m_cursor; //!< cursor position
	240	bool m_supports_cursor; //!< does this view support a cursor?
	241	bool m_cursor_visible; //!< is the cursor visible?
242	242
243	243	// update info
244		bool m_recompute; // does this view require a recomputation?
245		UINT8 m_update_level; // update level; updates when this hits 0
246		bool m_update_pending; // true if there is a pending update
247		bool m_osd_update_pending; // true if there is a pending update
248		debug_view_char * m_viewdata; // current array of view data
249		int m_viewdata_size; // number of elements of the viewdata array
	244	bool m_recompute; //!< does this view require a recomputation?
	245	UINT8 m_update_level; //!< update level; updates when this hits 0
	246	bool m_update_pending; //!< true if there is a pending update
	247	bool m_osd_update_pending; //!< true if there is a pending update
	248	debug_view_char * m_viewdata; //!< current array of view data
	249	int m_viewdata_size; //!< number of elements of the viewdata array
250	250
251	251	private:
252		running_machine & m_machine; // machine associated with this view
	252	running_machine & m_machine; //!< machine associated with this view
253	253	};
254	254
255	255
r26170	r26171
277	277	debug_view append(debug_view view);
278	278
279	279	// internal state
280		running_machine & m_machine; // reference to our machine
281		debug_view * m_viewlist; // list of views
	280	running_machine & m_machine; //!< reference to our machine
	281	debug_view * m_viewlist; //!< list of views
282	282	};
283	283
284	284
r26170	r26171
308	308	bool recompute();
309	309
310	310	// internal state
311		running_machine & m_machine; // reference to the machine
312		bool m_dirty; // true if the expression needs to be re-evaluated
313		UINT64 m_result; // last result from the expression
314		parsed_expression m_parsed; // parsed expression data
315		astring m_string; // copy of the expression string
	311	running_machine & m_machine; //!< reference to the machine
	312	bool m_dirty; //!< true if the expression needs to be re-evaluated
	313	UINT64 m_result; //!< last result from the expression
	314	parsed_expression m_parsed; //!< parsed expression data
	315	astring m_string; //!< copy of the expression string
316	316	};
317	317
318	318

branches/alto2/src/emu/debug/dvdisasm.c
r26170	r26171
17	17	#define DISASM_BUFFSIZE 128
18	18
19	19	//**************************************************************************
20		// UNICODE HELPERS
21		//**************************************************************************
22		static int unicode_strlen(const unicode_char* src)
23		{
24		int len = 0;
25		while (*src++)
26		len++;
27		return len;
28		}
29
30		static int unicode_strncmp(const unicode_char* dst, const unicode_char* src, size_t len)
31		{
32		while (src && dst && src == dst && len > 0) {
33		src++;
34		dst++;
35		len--;
36		}
37		if (src != dst)
38		return src < dst ? -1 : +1;
39		return 0;
40		}
41
42		static int unicode_sprintf(unicode_char* dst, const char* format, ...)
43		{
44		va_list ap;
45		char buff[256];
46		va_start(ap, format);
47		int len = vsnprintf(buff, sizeof(buff), format, ap);
48		va_end(ap);
49		for (int i = 0; i < len; i++)
50		*dst++ = buff[i];
51		*dst = 0;
52		return len;
53		}
54
55		static unicode_char* unicode_strncpy(unicode_char* dst, const unicode_char* src, size_t len)
56		{
57		unicode_char* str = dst;
58		while (*src && len > 0) {
59		dst++ = src++;
60		len--;
61		}
62		if (len > 0)
63		*dst = 0;
64		return str;
65		}
66
67		//**************************************************************************
68	20	// DEBUG VIEW DISASM SOURCE
69	21	//**************************************************************************
70	22
r26170	r26171
439	391	unicode_char destbuf = &m_dasm[instr m_allocated.x];
440	392	unicode_char oldbuf[DISASM_BUFFSIZE];
441	393	if (lines == 1)
442		unic~~ode~~_strncpy(oldbuf, destbuf, MIN(DISASM_BUFFSIZE, m_allocated.x));
	394	uchar_strncpy(oldbuf, destbuf, MIN(DISASM_BUFFSIZE, m_allocated.x));
443	395
444	396	// convert back and set the address of this instruction
445	397	m_byteaddress[instr] = pcbyte;
446		unic~~ode~~_sprintf(&destbuf[0], " %s ", core_i64_format(source.m_space.byte_to_address(pcbyte), source.m_space.logaddrchars()/2*char_num, source.is_octal()));
	398	uchar_sprintf(&destbuf[0], " %s ", core_i64_format(source.m_space.byte_to_address(pcbyte), source.m_space.logaddrchars()/2*char_num, source.is_octal()));
447	399
448	400	// make sure we can translate the address, and then disassemble the result
449	401	char buffer[DISASM_BUFFSIZE];
r26170	r26171
484	436	// get the bytes
485	437	numbytes = source.m_space.address_to_byte(numbytes) & source.m_space.logbytemask();
486	438	generate_bytes(pcbyte, numbytes, minbytes, buffer, m_allocated.x - m_divider2, m_right_column == DASM_RIGHTCOL_ENCRYPTED);
487		unic~~ode~~_sprintf(&destbuf[m_divider2], "%s", buffer);
	439	uchar_sprintf(&destbuf[m_divider2], "%s", buffer);
488	440	}
489	441	else if (m_right_column == DASM_RIGHTCOL_COMMENTS)
490	442	{
r26170	r26171
492	444	offs_t comment_address = source.m_space.byte_to_address(m_byteaddress[instr]);
493	445	const char *text = source.m_device.debug()->comment_text(comment_address);
494	446	if (text != NULL)
495		unic~~ode~~_sprintf(&destbuf[m_divider2], "// %.*s", m_allocated.x - m_divider2 - 1, text);
	447	uchar_sprintf(&destbuf[m_divider2], "// %.*s", m_allocated.x - m_divider2 - 1, text);
496	448	}
497	449
498	450	// see if the line changed at all
499		if (lines == 1 && unic~~ode~~_strncmp(oldbuf, destbuf, MIN(DISASM_BUFFSIZE, m_allocated.x)) != 0)
	451	if (lines == 1 && uchar_strncmp(oldbuf, destbuf, MIN(DISASM_BUFFSIZE, m_allocated.x)) != 0)
500	452	changed = true;
501	453	}
502	454
r26170	r26171
645	597
646	598	// get the effective string
647	599	const unicode_char data = &m_dasm[effrow m_allocated.x];
648		UINT32 len = unic~~ode~~_strlen(data);
	600	UINT32 len = uchar_strlen(data);
649	601
650	602	// copy data
651	603	UINT32 effcol = m_topleft.x;

branches/alto2/src/emu/debug/dvtext.c
r26170	r26171
15	15
16	16
17	17	//**************************************************************************
18		// UNICODE HELPERS
19		//**************************************************************************
20		static int unicode_strlen(const unicode_char* src)
21		{
22		int len = 0;
23		while (*src++)
24		len++;
25		return len;
26		}
27
28
29		//**************************************************************************
30	18	// DEBUG VIEW TEXTBUF
31	19	//**************************************************************************
32	20
r26170	r26171
92	80	// if this visible row is valid, add it to the buffer
93	81	if (line != NULL)
94	82	{
95		size_t len = unic~~ode~~_strlen(line);
	83	size_t len = uchar_strlen(line);
96	84	UINT32 effcol = m_topleft.x;
97	85
98	86	// copy data

branches/alto2/src/emu/debug/textbuf.c
r26170	r26171
46	46	***************************************************************************/
47	47
48	48	/*-------------------------------------------------
49		utf8_strlen - return the number of unicode
50		characters in UTF-8 encoded string
51		-------------------------------------------------*/
52		INLINE int utf8_strlen(const char* src)
53		{
54		int total = 0;
55		while (*src) {
56		unicode_char uchar;
57		int len = uchar_from_utf8(&uchar, src, strlen(src));
58		if (len < 0)
59		break; // invalid UTF-8
60		total++;
61		src += len;
62		}
63		return total;
64		}
65
66		/*-------------------------------------------------
67	49	buffer_used - return the number of bytes
68	50	currently held in the buffer
69	51	-------------------------------------------------*/

branches/alto2/src/lib/util/unicode.c
r26170	r26171
1	1	// license:BSD-3-Clause
2		// copyright-holders:Aaron Giles
	2	// copyright-holders:Aaron Giles, Jürgen Buchmüller
3	3	/*********************************************************************
4	4
5	5	unicode.c
r26170	r26171
11	11	#include "unicode.h"
12	12
13	13
14		/*-------------------------------------------------
15		uchar_isvalid - return true if a given
16		character is a legitimate unicode character
17		-------------------------------------------------*/
18
	14	/**
	15	* @brief test for legitimate unicode values
	16	*
	17	* return true if a given character is a legitimate unicode character
	18	*
	19	* @param uchar value to inspect
	20	* @return non zero (true) if uchar is valid, 0 otherwise
	21	*/
19	22	int uchar_isvalid(unicode_char uchar)
20	23	{
21	24	return (uchar < 0x110000) && !((uchar >= 0xd800) && (uchar <= 0xdfff));
22	25	}
23	26
24	27
25		/*-------------------------------------------------
26		uchar_from_utf8 - convert a UTF-8 sequence
27		into a unicode character
28		-------------------------------------------------*/
29
	28	/**
	29	* @brief convert an UTF-8 sequence into an unicode character
	30	* @param uchar pointer to the resulting unicode_char
	31	* @param utf8char pointer to the source string (may be NULL)
	32	* @param count number of characters available in utf8char
	33	* @return the number of characters used
	34	*/
30	35	int uchar_from_utf8(unicode_char uchar, const char utf8char, size_t count)
31	36	{
32	37	unicode_char c, minchar;
r26170	r26171
117	122	}
118	123
119	124
120		/*-------------------------------------------------
121		uchar_from_utf16 - convert a UTF-16 sequence
122		into a unicode character
123		-------------------------------------------------*/
124
	125	/**
	126	* @brief convert a UTF-16 sequence into an unicode character
	127	* @param uchar pointer to the resulting unicode_char
	128	* @param utf16char pointer to the source string (may be NULL)
	129	* @param count number of characters available in utf16char
	130	* @return the number of characters used
	131	*/
125	132	int uchar_from_utf16(unicode_char uchar, const utf16_char utf16char, size_t count)
126	133	{
127	134	int rc = -1;
r26170	r26171
151	158	}
152	159
153	160
154		/*-------------------------------------------------
155		uchar_from_utf16f - convert a UTF-16 sequence
156		into a unicode character from a flipped
157		byte order
158		-------------------------------------------------*/
159
	161	/**
	162	* @brief convert a UTF-16 sequence into an unicode character from a flipped byte order
	163	*
	164	* This flips endianness of the first two utf16_char in a local
	165	* copy and then calls uchar_from_utf16.
	166	*
	167	* @param uchar pointer to the resulting unicode_char
	168	* @param utf16char pointer to the source string (may be NULL)
	169	* @param count number of characters available in utf16char
	170	* @return the number of characters used
	171	*/
160	172	int uchar_from_utf16f(unicode_char uchar, const utf16_char utf16char, size_t count)
161	173	{
162	174	utf16_char buf[2] = {0};
r26170	r26171
168	180	}
169	181
170	182
171		/*-------------------------------------------------
172		utf8_from_uchar - convert a unicode character
173		into a UTF-8 sequence
174		-------------------------------------------------*/
175
	183	/**
	184	* @brief convert an unicode character into a UTF-8 sequence
	185	* @param utf8string pointer to the result char array
	186	* @param count number of characters that can be written to utf8string
	187	* @param uchar unciode_char value to convert
	188	* @return -1 on error, or the number of chars written on success (1 to 6)
	189	*/
176	190	int utf8_from_uchar(char *utf8string, size_t count, unicode_char uchar)
177	191	{
178	192	int rc = 0;
r26170	r26171
245	259	return rc;
246	260	}
247	261
248
249		/*-------------------------------------------------
250		utf16_from_uchar - convert a unicode character
251		into a UTF-16 sequence
252		-------------------------------------------------*/
253
	262	/**
	263	* @brief convert an unicode character into a UTF-16 sequence
	264	* @param utf16string pointer to the result array of utf16_char
	265	* @param count number of characters that can be written to utf16string
	266	* @param uchar unciode_char value to convert
	267	* @return -1 on error, or the number of utf16_char written on success (1 or 2)
	268	*/
254	269	int utf16_from_uchar(utf16_char *utf16string, size_t count, unicode_char uchar)
255	270	{
256	271	int rc;
r26170	r26171
282	297	return rc;
283	298	}
284	299
285
286		/*-------------------------------------------------
287		utf16_from_uchar - convert a unicode character
288		into a UTF-16 sequence with flipped endianness
289		-------------------------------------------------*/
290
	300	/**
	301	* @brief convert an unicode character into a UTF-16 sequence with flipped endianness
	302	* @param utf16string pointer to the result array of utf16_char
	303	* @param count number of characters that can be written to utf16string
	304	* @param uchar unciode_char value to convert
	305	* @return -1 on error, or the number of utf16_char written on success (1 or 2)
	306	*/
291	307	int utf16f_from_uchar(utf16_char *utf16string, size_t count, unicode_char uchar)
292	308	{
293	309	int rc;
294	310	utf16_char buf[2] = { 0, 0 };
295	311
296		rc = utf16_from_uchar(buf, ~~count~~, uchar);
	312	rc = utf16_from_uchar(buf, 2, uchar);
297	313
298		if (rc >= 1)
	314	if (rc >= 1 && count >= 1)
299	315	utf16string[0] = FLIPENDIAN_INT16(buf[0]);
300		if (rc >= 2)
	316	if (rc >= 2 && count >= 2)
301	317	utf16string[1] = FLIPENDIAN_INT16(buf[1]);
302		return rc;
	318	return rc < count ? rc : count;
303	319	}
304	320
305	321
306		/*-------------------------------------------------
307		utf8_previous_char - return a pointer to the
308		previous character in a string
309		-------------------------------------------------*/
310
	322	/**
	323	* @brief return a pointer to the previous character in a string
	324	* @param utf8string const pointer to the starting position in the string
	325	* @return pointer to the character which is not an UTF-8 auxiliary character
	326	*/
311	327	const char utf8_previous_char(const char utf8string)
312	328	{
313	329	while ((*--utf8string & 0xc0) == 0x80)
r26170	r26171
315	331	return utf8string;
316	332	}
317	333
318
319		/*-------------------------------------------------
320		utf8_is_valid_string - return true if the
321		given string is a properly formed sequence of
322		UTF-8 characters
323		-------------------------------------------------*/
324
	334	/**
	335	* @brief return true if the given string is a properly formed sequence of UTF-8 characters
	336	* @param utf8string const pointer to the source string
	337	* @return TRUE if the string is valid, FALSE otherwise
	338	*/
325	339	int utf8_is_valid_string(const char *utf8string)
326	340	{
327	341	int remaining_length = strlen(utf8string);
r26170	r26171
344	358	return TRUE;
345	359	}
346	360
347		/*-------------------------------------------------
348		unicode_load_table - load a lookup table
349		for e.g. ISO-8859-1 to Unicode from a file
350		The expected format is "Format A" defined
351		by unicode.org
352		-------------------------------------------------*/
	361	/**
	362	* @brief return the number of decoded Unicode values in UTF-8 encoded string
	363	* @param src pointer to the array of UTF-8 encoded characters
	364	* @return number of unicode_char values decoded from the UTF-8 string
	365	*/
	366	size_t utf8_strlen(const char* src)
	367	{
	368	int total = 0;
	369	while (*src) {
	370	unicode_char uchar;
	371	int len = uchar_from_utf8(&uchar, src, strlen(src));
	372	if (len < 0)
	373	break; // invalid UTF-8
	374	total++;
	375	src += len;
	376	}
	377	return total;
	378	}
	379
	380	/**
	381	* @brief load a lookup table 8 bit codes to Unicode values
	382	*
	383	* This opens and reads a file %name which has to be in the
	384	* unicode.org defined "Format A".
	385	* That is three columns
	386	* column 1: hex encoded 8 bit value of the code
	387	* column 2: hex encoded 32 bit (max) unicode value
	388	* column 3: a hash (#) and optional comment until the end-of-line
	389	*
	390	* @param name name of the (text) file to parse
	391	* @return pointer to a newly allocated array of 256 unicode_char values
	392	*/
353	393	unicode_char * uchar_table_load(const char* name)
354	394	{
355	395	FILE* file = fopen(name, "r");
r26170	r26171
384	424	return table;
385	425	}
386	426
	427	/**
	428	* @brief return the 8 bit code that is mapped to the specified unicode_char
	429	* @param table table of 256 unicode_char values to use for the reverse lookup
	430	* @param uchar unicode value to revers lookup
	431	* @return UINT8 with the 8 bit code, or 255 if uchar wasn't found
	432	*/
387	433	UINT8 uchar_table_index(unicode_char* table, unicode_char uchar)
388	434	{
389	435	UINT8 index;
r26170	r26171
393	439	return index;
394	440	}
395	441
	442	/**
	443	* @brief free an unicode lookup table
	444	* @param table
	445	*/
396	446	void uchar_table_free(unicode_char* table)
397	447	{
398	448	if (table)
399	449	free(table);
400	450	}
401	451
	452	/**
	453	* @brief return the unicode_char array length
	454	* @param src pointer to an array of unicode_char
	455	* @return length of the array until the first 0
	456	*/
	457	size_t uchar_strlen(const unicode_char* src)
	458	{
	459	int len = 0;
	460	while (*src++)
	461	len++;
	462	return len;
	463	}
	464
	465	/**
	466	* @brief compare two unicode_char arrays
	467	* @param dst pointer to the first array of unicode_char
	468	* @param src pointer to the second array of unicode_char
	469	* @return 0 if dst == src, -1 if dst < src or +1 otherwise
	470	*/
	471	int uchar_strcmp(const unicode_char* dst, const unicode_char* src)
	472	{
	473	while (src && dst && src == dst)
	474	{
	475	src++;
	476	dst++;
	477	}
	478	if (src != dst)
	479	return src < dst ? -1 : +1;
	480	return 0;
	481	}
	482
	483	/**
	484	* @brief compare two unicode_char arrays with length limiting
	485	* @param dst pointer to the first array of unicode_char
	486	* @param src pointer to the second array of unicode_char
	487	* @param len maximum number of unicode_char to compare
	488	* @return 0 if dst == src, -1 if dst < src or +1 otherwise
	489	*/
	490	int uchar_strncmp(const unicode_char* dst, const unicode_char* src, size_t len)
	491	{
	492	while (src && dst && src == dst && len > 0)
	493	{
	494	src++;
	495	dst++;
	496	len--;
	497	}
	498	if (src != dst)
	499	return src < dst ? -1 : +1;
	500	return 0;
	501	}
	502
	503	/**
	504	* @brief print a formatted string of ASCII characters to an unicode_char array
	505	* @param dst pointer to the array
	506	* @param format format string followed by optional parameters
	507	* @return number of unicode_char stored in dst
	508	*/
	509	int uchar_sprintf(unicode_char* dst, const char* format, ...)
	510	{
	511	va_list ap;
	512	char buff[256];
	513	va_start(ap, format);
	514	int len = vsnprintf(buff, sizeof(buff), format, ap);
	515	va_end(ap);
	516	for (int i = 0; i < len; i++)
	517	*dst++ = buff[i];
	518	*dst = 0;
	519	return len;
	520	}
	521
	522	/**
	523	* @brief copy an array of unicode_char from source to destination
	524	*
	525	* @param dst pointer to destination array
	526	* @param src const pointer to the source array
	527	* @return a pointer to the original destination
	528	*/
	529	unicode_char* uchar_strcpy(unicode_char* dst, const unicode_char* src)
	530	{
	531	unicode_char* str = dst;
	532	while (*src)
	533	dst++ = src++;
	534	return str;
	535	}
	536
	537	/**
	538	* @brief copy a length limited array of unicode_char from source to destination
	539	*
	540	* This function always terminates dst with a 0 unicode_char, unlike some
	541	* classic libc implementations of strncpy(). This means that actually at
	542	* most len-1 unicode_char are copied from src to leave room for the 0 code.
	543	*
	544	* @param dst pointer to destination array
	545	* @param src const pointer to the source array
	546	* @param len maximum number of unicode_char to copy
	547	* @return a pointer to the original destination
	548	*/
	549	unicode_char* uchar_strncpy(unicode_char* dst, const unicode_char* src, size_t len)
	550	{
	551	unicode_char* str = dst;
	552	while (*src && len > 1)
	553	{
	554	dst++ = src++;
	555	len--;
	556	}
	557	if (len > 0)
	558	*dst = 0;
	559	return str;
	560	}
	561
402	562	/***************************************************************************
403	563	*
404	564	* Parsing and access to the UnicodeData table published at unicode.org
405	565	*
406	566	***************************************************************************/
407	567
408		//! Information about a unicode_char
	568	//! Information about an unicode_char
409	569	typedef struct {
410	570	#if NEED_UNICODE_NAME
411	571	char *name; //!< name of the character
r26170	r26171
552	712	{0xfe70, 0xfeff, "Arabic Presentation Forms-B"},
553	713	{0xff00, 0xffef, "Halfwidth and Fullwidth Forms"},
554	714	{0xfff0, 0xffff, "Specials"}
	715	// FIXME: add ranges for the Unicode planes 1 to 16
555	716	};
556	717	#endif
557	718
558	719	#if NEED_UNICODE_CCOM
559	720	static const char *canonical_combining_str(UINT8 val)
560	721	{
561		switch (val) {
	722	switch (val)
	723	{
562	724	case 0: return "Spacing, split, enclosing, reordrant, and Tibetan subjoined";
563	725	case 1: return "Overlays and interior";
564	726	case 7: return "Nuktas";
r26170	r26171
620	782	{
621	783	if (!unicode_data \|\| uchar >= UNICODE_PLANESIZE \|\| !unicode_data[uchar])
622	784	return "";
623		switch (unicode_data[uchar]->gen_cat) {
	785	switch (unicode_data[uchar]->gen_cat)
	786	{
624	787	case gcat_Lu: return "Lu: Letter, Uppercase";
625	788	case gcat_Ll: return "Ll: Letter, Lowercase";
626	789	case gcat_Lt: return "Lt: Letter, Titlecase";
r26170	r26171
685	848	{
686	849	if (!unicode_data \|\| uchar >= UNICODE_PLANESIZE \|\| !unicode_data[uchar])
687	850	return "";
688		switch (unicode_data[uchar]->bidi) {
	851	switch (unicode_data[uchar]->bidi)
	852	{
689	853	case bidi_L: return "L: Left-to-Right";
690	854	case bidi_LRE: return "LRE: Left-to-Right Embedding";
691	855	case bidi_LRO: return "LRO: Left-to-Right Override";
r26170	r26171
723	887	{
724	888	if (!unicode_data \|\| uchar >= UNICODE_PLANESIZE \|\| !unicode_data[uchar])
725	889	return "";
726		switch (unicode_data[uchar]->decomp_map) {
	890	switch (unicode_data[uchar]->decomp_map)
	891	{
727	892	case deco_canonical: return "Canonical mapping";
728	893	case deco_font: return "A font variant (e.g. a blackletter form)";
729	894	case deco_noBreak: return "A no-break version of a space or hyphen";
r26170	r26171
915	1080	static UINT32 hit = 0;
916	1081	UINT32 i;
917	1082
918		for (i = hit; i < sizeof(unicode_ranges)/sizeof(unicode_ranges[0]); i++) {
919		if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) {
	1083	for (i = hit; i < sizeof(unicode_ranges)/sizeof(unicode_ranges[0]); i++)
	1084	{
	1085	if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last)
	1086	{
920	1087	hit = i;
921	1088	return unicode_ranges[i].name;
922	1089	}
923	1090	}
924		for (i = 0; i < hit; i++) {
925		if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) {
	1091	for (i = 0; i < hit; i++)
	1092	{
	1093	if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last)
	1094	{
926	1095	hit = i;
927	1096	return unicode_ranges[i].name;
928	1097	}
r26170	r26171
936	1105	static UINT32 hit = 0;
937	1106	UINT32 i;
938	1107
939		for (i = hit; i < sizeof(unicode_ranges)/sizeof(unicode_ranges[0]); i++) {
940		if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) {
	1108	for (i = hit; i < sizeof(unicode_ranges)/sizeof(unicode_ranges[0]); i++)
	1109	{
	1110	if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last)
	1111	{
941	1112	hit = i;
942	1113	return unicode_ranges[i].first;
943	1114	}
944	1115	}
945
946		for (i = 0; i < hit; i++) {
947		if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) {
	1116	for (i = 0; i < hit; i++)
	1117	{
	1118	if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last)
	1119	{
948	1120	hit = i;
949	1121	return unicode_ranges[i].first;
950	1122	}
r26170	r26171
958	1130	static UINT32 hit = 0;
959	1131	UINT32 i;
960	1132
961		for (i = hit; i < sizeof(unicode_ranges)/sizeof(unicode_ranges[0]); i++) {
962		if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) {
	1133	for (i = hit; i < sizeof(unicode_ranges)/sizeof(unicode_ranges[0]); i++)
	1134	{
	1135	if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last)
	1136	{
963	1137	hit = i;
964	1138	return unicode_ranges[i].last;
965	1139	}
966	1140	}
967
968		for (i = 0; i < hit; i++) {
969		if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) {
	1141	for (i = 0; i < hit; i++)
	1142	{
	1143	if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last)
	1144	{
970	1145	hit = i;
971	1146	return unicode_ranges[i].last;
972	1147	}
r26170	r26171
985	1160	if (src)
986	1161	token = src;
987	1162	start = token;
988		while (token && *token) {
	1163	while (token && *token)
	1164	{
989	1165	const char *d = delim;
990		while (*d) {
991		if (token == d) {
	1166	while (*d)
	1167	{
	1168	if (token == d)
	1169	{
992	1170	*token++ = '\0';
993	1171	return start;
994	1172	}
r26170	r26171
1016	1194	unicode_char first = 0;
1017	1195	unicode_char last = 0;
1018	1196
1019		while (fgets(line, 1024, file)) {
	1197	while (fgets(line, 1024, file))
	1198	{
1020	1199	unicode_data_t u;
1021	1200	unicode_char code;
1022	1201	int tokennum = 1;
r26170	r26171
1034	1213	tokennum++;
1035	1214	if (NULL == (token = parse_strtok(NULL, ";\r\n")))
1036	1215	fprintf(stderr, "%s: token #%d failed on line %d\n%s", __FUNCTION__, tokennum, linenum, line);
1037		if (NULL != token) {
	1216	if (NULL != token)
	1217	{
1038	1218	// check for a range description
1039		if (token[0] == '<') {
	1219	if (token[0] == '<')
	1220	{
1040	1221	// strip a trailing ", First>" string fragment
1041		if (0 == strcmp(token + strlen(token) - 8,", First>")) {
	1222	if (0 == strcmp(token + strlen(token) - 8,", First>"))
	1223	{
1042	1224	strcpy(token, token + 1);
1043	1225	token[strlen(token) - 8] = '\0';
1044	1226	first = code;
1045	1227	}
1046	1228	// strip a trailing ", Last>" string fragment
1047		if (0 == strcmp(token + strlen(token) - 7,", Last>")) {
	1229	if (0 == strcmp(token + strlen(token) - 7,", Last>"))
	1230	{
1048	1231	strcpy(token, token + 1);
1049	1232	token[strlen(token) - 7] = '\0';
1050	1233	last = code;
r26170	r26171
1062	1245	#if NEED_UNICODE_GCAT
1063	1246	// parse general category
1064	1247	u.gen_cat = gcat_0;
1065		if (NULL != token) {
	1248	if (NULL != token)
	1249	{
1066	1250	if (0 == strcmp(token, "Lu"))
1067	1251	u.gen_cat = gcat_Lu;
1068	1252	if (0 == strcmp(token, "Ll"))
r26170	r26171
1140	1324	#if NEED_UNICODE_BIDI
1141	1325	// parse bidirectional category
1142	1326	u.bidi = bidi_0;
1143		if (NULL != token) {
	1327	if (NULL != token)
	1328	{
1144	1329	if (0 == strcmp(token, "L"))
1145	1330	u.bidi = bidi_L; // Left-to-Right
1146	1331	if (0 == strcmp(token, "LRE"))
r26170	r26171
1187	1372	fprintf(stderr, "%s: token #%d failed on line %d\n%s", __FUNCTION__, tokennum, linenum, line);
1188	1373	#if NEED_UNICODE_DECO
1189	1374	// parse decomposition mapping
1190		if (NULL != token) {
	1375	if (NULL != token)
	1376	{
1191	1377	unicode_char decomposed[256];
1192	1378	UINT8 n = 0;
1193	1379	char *p = token;
r26170	r26171
1229	1415	while (isspace(*p))
1230	1416	p++;
1231	1417	// parse decomposition codes
1232		while (*p) {
	1418	while (*p)
	1419	{
1233	1420	// skip initial whitespace
1234	1421	while (isspace(*p))
1235	1422	p++;
r26170	r26171
1242	1429	if (n >= 255)
1243	1430	break;
1244	1431	}
1245		if (n > 0) {
	1432	if (n > 0)
	1433	{
1246	1434	u.n_decomp = n;
1247	1435	u.decomp_codes = (unicode_char)malloc(sizeof(unicode_char) n);
1248	1436	memcpy(u.decomp_codes, decomposed, sizeof(unicode_char) * n);
r26170	r26171
1296	1484	tokennum++;
1297	1485	if (NULL == (token = parse_strtok(NULL, ";\r\n")))
1298	1486	fprintf(stderr, "%s: token #%d failed on line %d\n%s", __FUNCTION__, tokennum, linenum, line);
1299		if (NULL != token && *token) {
	1487	if (NULL != token && *token)
	1488	{
1300	1489	/* FIXME: hmm ... don't know what this token means */
1301	1490	}
1302	1491
r26170	r26171
1324	1513	u.titlecase = strtoul(token, NULL, 16);
1325	1514	#endif
1326	1515
1327		if (first > 0 && last > 0) {
1328		if (first + 1 >= UNICODE_PLANESIZE) {
	1516	if (first > 0 && last > 0)
	1517	{
	1518	if (first + 1 >= UNICODE_PLANESIZE)
	1519	{
1329	1520	fprintf(stderr, "%s: range %#07x-%#07x outside planes\n", __FUNCTION__, first + 1, last);
1330		} else {
	1521	}
	1522	else
	1523	{
1331	1524	for (code = first + 1; code <= last && code < UNICODE_PLANESIZE; code++)
1332	1525	unicode_data[code] = unicode_data[first];
1333	1526	}
r26170	r26171
1335	1528	last = 0;
1336	1529	code = UNICODE_PLANESIZE;
1337	1530	}
1338		if (code < UNICODE_PLANESIZE) {
	1531	if (code < UNICODE_PLANESIZE)
	1532	{
1339	1533	unicode_data[code] = (unicode_data_t *)malloc(sizeof(unicode_data_t));
1340	1534	memcpy(unicode_data[code], &u, sizeof(u));
1341	1535	}

branches/alto2/src/lib/util/unicode.h
r26170	r26171
22	22
23	23	#include <stdlib.h>
24	24	#include <stdio.h>
	25	#include <stdarg.h>
25	26	#include <ctype.h>
26	27	#include "osdcore.h"
27	28
r26170	r26171
31	32	CONSTANTS
32	33	***************************************************************************/
33	34
34		/* these defines specify the maximum size of different types of Unicode
35		* character encodings */
36		#define UTF8_CHAR_MAX 6
37		#define UTF16_CHAR_MAX 2
38
39		/* these are UTF-8 encoded strings for common characters */
40		#define UTF8_NBSP "\xc2\xa0" /* non-breaking space */
41		#define UTF8_MULTIPLY "\xc3\x97" /* multiplication symbol */
42		#define UTF8_DEGREES "\xc2\xb0" /* degrees symbol */
43
44		#define a_RING "\xc3\xa5" /* small a with a ring */
45		#define a_UMLAUT "\xc3\xa4" /* small a with an umlaut */
46		#define o_UMLAUT "\xc3\xb6" /* small o with an umlaut */
47		#define u_UMLAUT "\xc3\xbc" /* small u with an umlaut */
48		#define e_ACUTE "\xc3\xa9" /* small e with an acute */
49
50		#define A_RING "\xc3\x85" /* capital A with a ring */
51		#define A_UMLAUT "\xc3\x84" /* capital A with an umlaut */
52		#define O_UMLAUT "\xc3\x96" /* capital O with an umlaut */
53		#define U_UMLAUT "\xc3\x9c" /* capital U with an umlaut */
54		#define E_ACUTE "\xc3\x89" /* capital E with an acute */
55
56		#define UTF8_LEFT "\xe2\x86\x90" /* cursor left */
57		#define UTF8_RIGHT "\xe2\x86\x92" /* cursor right */
58		#define UTF8_UP "\xe2\x86\x91" /* cursor up */
59		#define UTF8_DOWN "\xe2\x86\x93" /* cursor down */
60
61
62
63		/***************************************************************************
64		TYPE DEFINITIONS
65		***************************************************************************/
66
67		typedef UINT16 utf16_char;
68		typedef UINT32 unicode_char;
69
70
71
72		/***************************************************************************
73		FUNCTION PROTOTYPES
74		***************************************************************************/
75
76		/* tests to see if a unicode char is a valid code point */
77		int uchar_isvalid(unicode_char uchar);
78
79		/* converting strings to 32-bit Unicode chars */
80		int uchar_from_utf8(unicode_char uchar, const char utf8char, size_t count);
81		int uchar_from_utf16(unicode_char uchar, const utf16_char utf16char, size_t count);
82		int uchar_from_utf16f(unicode_char uchar, const utf16_char utf16char, size_t count);
83
84		/* converting 32-bit Unicode chars to strings */
85		int utf8_from_uchar(char *utf8string, size_t count, unicode_char uchar);
86		int utf16_from_uchar(utf16_char *utf16string, size_t count, unicode_char uchar);
87		int utf16f_from_uchar(utf16_char *utf16string, size_t count, unicode_char uchar);
88
89		/* misc UTF-8 helpers */
90		const char utf8_previous_char(const char utf8string);
91		int utf8_is_valid_string(const char *utf8string);
92
93		/* Unicode lookup table loader */
94		//! load a table translating UINT8 (unsigned char) to Unicode values
95		unicode_char * uchar_table_load(const char* name);
96
97		//! reverse lookup of uchar in a Unicode table - returns 255 if not found
98		UINT8 uchar_table_index(unicode_char* table, unicode_char uchar);
99
100		//! free a unicode table
101		void uchar_table_free(unicode_char* table);
102
103		/***************************************************************************
104		* unicode.org published UnicodeData.txt
105		* parser and accessors
106		***************************************************************************/
107
108		//! load the UnicodeData.txt file an parse it
109		int unicode_data_load(const char* name);
110
111		//! free the UnicodeData.txt table memory
112		void unicode_data_free();
113
114	35	//! size of the first 17 Unicode planes
115	36	#define UNICODE_PLANESIZE 0x110000
116	37
117	38	#ifndef NEED_UNICODE_RANGES
118	39	#define NEED_UNICODE_RANGES 1 //!< define to 1, if the name, first or last of the range of a code is needed
119	40	#endif
120
121	41	#ifndef NEED_UNICODE_NAME
122	42	#define NEED_UNICODE_NAME 1 //!< define to 1, if the name of a code is needed
123	43	#endif
124
125	44	#ifndef NEED_UNICODE_NAME10
126	45	#define NEED_UNICODE_NAME10 1 //!< define to 1, if the short name of a code is needed
127	46	#endif
128
129	47	#ifndef NEED_UNICODE_GCAT
130	48	#define NEED_UNICODE_GCAT 1 //!< define to 1, if the general category of a code is needed
131	49	#endif
132
133	50	#ifndef NEED_UNICODE_CCOM
134	51	#define NEED_UNICODE_CCOM 1 //!< define to 1, if the canonical combining (name) of a code is needed
135	52	#endif
136
137	53	#ifndef NEED_UNICODE_BIDI
138	54	#define NEED_UNICODE_BIDI 1 //!< define to 1, if the bidirectional category of a code is needed
139	55	#endif
140
141	56	#ifndef NEED_UNICODE_DECO
142	57	#define NEED_UNICODE_DECO 1 //!< define to 1, if the decomposition codes of a code are needed
143	58	#endif
144
145	59	#ifndef NEED_UNICODE_DECIMAL
146	60	#define NEED_UNICODE_DECIMAL 1 //!< define to 1, if the decimal value of a code is needed
147	61	#endif
148
149	62	#ifndef NEED_UNICODE_DIGIT
150	63	#define NEED_UNICODE_DIGIT 1 //!< define to 1, if the digit value of a code is needed
151	64	#endif
152
153	65	#ifndef NEED_UNICODE_NUMERIC
154	66	#define NEED_UNICODE_NUMERIC 1 //!< define to 1, if the numeric value of a code is needed
155	67	#endif
156
157	68	#ifndef NEED_UNICODE_MIRRORED
158	69	#define NEED_UNICODE_MIRRORED 1 //!< define to 1, if the mirrored flag of a code is needed
159	70	#endif
160
161	71	#ifndef NEED_UNICODE_DECN
162	72	#define NEED_UNICODE_DECN 1 //!< define to 1, if access to decomposed code [n] of a code is needed
163	73	#endif
164
165	74	#ifndef NEED_UNICODE_UCASE
166	75	#define NEED_UNICODE_UCASE 1 //!< define to 1, if the upper case value of a code is needed
167	76	#endif
168
169	77	#ifndef NEED_UNICODE_LCASE
170	78	#define NEED_UNICODE_LCASE 1 //!< define to 1, if the lower case value of a code is needed
171	79	#endif
172
173	80	#ifndef NEED_UNICODE_TCASE
174	81	#define NEED_UNICODE_TCASE 1 //!< define to 1, if the title case value of a code is needed
175	82	#endif
176
177	83	#ifndef NEED_UNICODE_WIDTH
178	84	#define NEED_UNICODE_WIDTH 1 //!< define to 1, if the glyph width of a code is needed
179	85	#endif
180	86
	87	#define UTF8_CHAR_MAX 6 //!< maximum size of Unicode UTF-8 encoding
	88	#define UTF16_CHAR_MAX 2 //!< maximum size of Unicode UTF-16 encoding
	89
	90	/* these are UTF-8 encoded strings for common characters */
	91	#define UTF8_NBSP "\xc2\xa0" //!< non-breaking space
	92	#define UTF8_MULTIPLY "\xc3\x97" //!< multiplication symbol
	93	#define UTF8_DEGREES "\xc2\xb0" //!< degrees symbol
	94
	95	#define a_RING "\xc3\xa5" //!< small a with a ring
	96	#define a_UMLAUT "\xc3\xa4" //!< small a with an umlaut
	97	#define o_UMLAUT "\xc3\xb6" //!< small o with an umlaut
	98	#define u_UMLAUT "\xc3\xbc" //!< small u with an umlaut
	99	#define e_ACUTE "\xc3\xa9" //!< small e with an acute
	100
	101	#define A_RING "\xc3\x85" //!< capital A with a ring
	102	#define A_UMLAUT "\xc3\x84" //!< capital A with an umlaut
	103	#define O_UMLAUT "\xc3\x96" //!< capital O with an umlaut
	104	#define U_UMLAUT "\xc3\x9c" //!< capital U with an umlaut
	105	#define E_ACUTE "\xc3\x89" //!< capital E with an acute
	106
	107	#define UTF8_LEFT "\xe2\x86\x90" //!< cursor left
	108	#define UTF8_RIGHT "\xe2\x86\x92" //!< cursor right
	109	#define UTF8_UP "\xe2\x86\x91" //!< cursor up
	110	#define UTF8_DOWN "\xe2\x86\x93" //!< cursor down
	111
	112
	113
	114	/***************************************************************************
	115	TYPE DEFINITIONS
	116	***************************************************************************/
	117
	118	typedef UINT16 utf16_char; //!< type used for UTF-16 encoded values
	119	typedef UINT32 unicode_char; //!< type used for full width Unicode values
	120
	121
	122
	123	/***************************************************************************
	124	FUNCTION PROTOTYPES
	125	***************************************************************************/
	126
	127	//! tests to see if a unicode char is a valid code point
	128	int uchar_isvalid(unicode_char uchar);
	129
	130	//! convert an UTF-8 sequence into an unicode character
	131	int uchar_from_utf8(unicode_char uchar, const char utf8char, size_t count);
	132
	133	//! convert a UTF-16 sequence into an unicode character
	134	int uchar_from_utf16(unicode_char uchar, const utf16_char utf16char, size_t count);
	135
	136	//! convert a UTF-16 sequence into an unicode character from a flipped byte order
	137	int uchar_from_utf16f(unicode_char uchar, const utf16_char utf16char, size_t count);
	138
	139	//! convert an unicode character into a UTF-8 sequence
	140	int utf8_from_uchar(char *utf8string, size_t count, unicode_char uchar);
	141
	142	//! convert an unicode character into a UTF-16 sequence
	143	int utf16_from_uchar(utf16_char *utf16string, size_t count, unicode_char uchar);
	144
	145	//! convert an unicode character into a UTF-16 sequence with flipped endianness
	146	int utf16f_from_uchar(utf16_char *utf16string, size_t count, unicode_char uchar);
	147
	148	/* misc UTF-8 helpers */
	149	//! return a pointer to the previous character in a string
	150	const char utf8_previous_char(const char utf8string);
	151
	152	//! return true if the given string is a properly formed sequence of UTF-8 characters
	153	int utf8_is_valid_string(const char *utf8string);
	154
	155	//! return the number of decoded Unicode values in UTF-8 encoded string
	156	size_t utf8_strlen(const char* src);
	157
	158	/* 8 bit code to Unicode value lookup table handling (e.g. ISO-8859-1 aka Latin1) */
	159	//! load a table translating UINT8 (unsigned char) to Unicode values
	160	unicode_char * uchar_table_load(const char* name);
	161
	162	//! reverse lookup of uchar in a Unicode table
	163	UINT8 uchar_table_index(unicode_char* table, unicode_char uchar);
	164
	165	//! free a unicode table
	166	void uchar_table_free(unicode_char* table);
	167
	168	/* unicode_char array functions - string.h like */
	169	//! return the unicode_char array length
	170	size_t uchar_strlen(const unicode_char* src);
	171
	172	//! compare two unicode_char arrays
	173	int uchar_strcmp(const unicode_char* dst, const unicode_char* src);
	174
	175	//! compare two unicode_char arrays with length limiting
	176	int uchar_strncmp(const unicode_char* dst, const unicode_char* src, size_t len);
	177
	178	//! print a formatted string of ASCII characters to an unicode_char array (max 256 characters)
	179	int uchar_sprintf(unicode_char* dst, const char* format, ...);
	180
	181	//! copy an array of unicode_char from source to destination
	182	unicode_char* uchar_strcpy(unicode_char* dst, const unicode_char* src);
	183
	184	//! copy a length limited array of unicode_char from source to destination
	185	unicode_char* uchar_strncpy(unicode_char* dst, const unicode_char* src, size_t len);
	186
	187	/***************************************************************************
	188	* unicode.org published UnicodeData.txt
	189	* Parser and property accessors
	190	***************************************************************************/
	191
	192	//! load the specified UnicodeData.txt file an parse it
	193	int unicode_data_load(const char* name);
	194
	195	//! free the UnicodeData.txt table memory
	196	void unicode_data_free();
	197
181	198	#if NEED_UNICODE_GCAT
	199	/**
	200	* @brief enumeration of the possible general categories
	201	*/
182	202	typedef enum {
183	203	gcat_0, //!< invalid value
184	204	gcat_Lu, //!< Letter, Uppercase
r26170	r26171
215	235	#endif
216	236
217	237	#if NEED_UNICODE_BIDI
	238	/**
	239	* @brief enumeration of the possible bidirectional categories
	240	*/
218	241	typedef enum {
219	242	bidi_0, //!< invalid value
220	243	bidi_L, //!< Left-to-Right
r26170	r26171
240	263	#endif
241	264
242	265	#if NEED_UNICODE_DECO
	266	/**
	267	* @brief enumeration of the possible decomposition mappings
	268	*/
243	269	typedef enum {
244	270	deco_0, //!< invalid value
245	271	deco_canonical, //!< canonical mapping

199869 Revisions