branches/alto2/src/emu/debug/debugvw.h
| r26170 | r26171 | |
| 19 | 19 | // CONSTANTS |
| 20 | 20 | //************************************************************************** |
| 21 | 21 | |
| 22 | | // types passed to debug_view_manager::alloc_view() |
| 22 | //! types passed to debug_view_manager::alloc_view() |
| 23 | 23 | enum debug_view_type |
| 24 | 24 | { |
| 25 | 25 | DVT_NONE, |
| r26170 | r26171 | |
| 35 | 35 | }; |
| 36 | 36 | |
| 37 | 37 | |
| 38 | | // notifications passed to view_notify() |
| 38 | //! notifications passed to view_notify() |
| 39 | 39 | enum debug_view_notification |
| 40 | 40 | { |
| 41 | 41 | VIEW_NOTIFY_NONE, |
| r26170 | r26171 | |
| 46 | 46 | |
| 47 | 47 | |
| 48 | 48 | // attribute bits for debug_view_char.attrib |
| 49 | | const UINT8 DCA_NORMAL = 0x00; // black on white |
| 50 | | const UINT8 DCA_CHANGED = 0x01; // red foreground |
| 51 | | const UINT8 DCA_SELECTED = 0x02; // light red background |
| 52 | | const UINT8 DCA_INVALID = 0x04; // dark blue foreground |
| 53 | | const UINT8 DCA_DISABLED = 0x08; // darker foreground |
| 54 | | const UINT8 DCA_ANCILLARY = 0x10; // grey background |
| 55 | | const UINT8 DCA_CURRENT = 0x20; // yellow background |
| 56 | | const UINT8 DCA_COMMENT = 0x40; // green foreground |
| 57 | | const UINT8 DCA_VISITED = 0x80; // light blue background |
| 49 | const UINT8 DCA_NORMAL = 0x00; //!< black on white |
| 50 | const UINT8 DCA_CHANGED = 0x01; //!< red foreground |
| 51 | const UINT8 DCA_SELECTED = 0x02; //!< light red background |
| 52 | const UINT8 DCA_INVALID = 0x04; //!< dark blue foreground |
| 53 | const UINT8 DCA_DISABLED = 0x08; //!< darker foreground |
| 54 | const UINT8 DCA_ANCILLARY = 0x10; //!< grey background |
| 55 | const UINT8 DCA_CURRENT = 0x20; //!< yellow background |
| 56 | const UINT8 DCA_COMMENT = 0x40; //!< green foreground |
| 57 | const UINT8 DCA_VISITED = 0x80; //!< light blue background |
| 58 | 58 | |
| 59 | 59 | |
| 60 | 60 | // special characters that can be passed to process_char() |
| 61 | | const int DCH_UP = 1; // up arrow |
| 62 | | const int DCH_DOWN = 2; // down arrow |
| 63 | | const int DCH_LEFT = 3; // left arrow |
| 64 | | const int DCH_RIGHT = 4; // right arrow |
| 65 | | const int DCH_PUP = 5; // page up |
| 66 | | const int DCH_PDOWN = 6; // page down |
| 67 | | const int DCH_HOME = 7; // home |
| 68 | | const int DCH_CTRLHOME = 8; // ctrl+home |
| 69 | | const int DCH_END = 9; // end |
| 70 | | const int DCH_CTRLEND = 10; // ctrl+end |
| 71 | | const int DCH_CTRLRIGHT = 11; // ctrl+right |
| 72 | | const int DCH_CTRLLEFT = 12; // ctrl+left |
| 61 | const int DCH_UP = 1; //!< up arrow |
| 62 | const int DCH_DOWN = 2; //!< down arrow |
| 63 | const int DCH_LEFT = 3; //!< left arrow |
| 64 | const int DCH_RIGHT = 4; //!< right arrow |
| 65 | const int DCH_PUP = 5; //!< page up |
| 66 | const int DCH_PDOWN = 6; //!< page down |
| 67 | const int DCH_HOME = 7; //!< home |
| 68 | const int DCH_CTRLHOME = 8; //!< ctrl+home |
| 69 | const int DCH_END = 9; //!< end |
| 70 | const int DCH_CTRLEND = 10; //!< ctrl+end |
| 71 | const int DCH_CTRLRIGHT = 11; //!< ctrl+right |
| 72 | const int DCH_CTRLLEFT = 12; //!< ctrl+left |
| 73 | 73 | |
| 74 | 74 | |
| 75 | 75 | // special characters that can be passed to process_click() |
| 76 | | const int DCK_LEFT_CLICK = 1; // left instantaneous click |
| 77 | | const int DCK_RIGHT_CLICK = 2; // right instantaneous click |
| 78 | | const int DCK_MIDDLE_CLICK = 3; // middle instantaneous click |
| 76 | const int DCK_LEFT_CLICK = 1; //!< left instantaneous click |
| 77 | const int DCK_RIGHT_CLICK = 2; //!< right instantaneous click |
| 78 | const int DCK_MIDDLE_CLICK = 3; //!< middle instantaneous click |
| 79 | 79 | |
| 80 | 80 | |
| 81 | 81 | //************************************************************************** |
| r26170 | r26171 | |
| 86 | 86 | class debug_view; |
| 87 | 87 | |
| 88 | 88 | |
| 89 | | // OSD callback function for a view |
| 89 | //! OSD callback function for a view |
| 90 | 90 | typedef void (*debug_view_osd_update_func)(debug_view &view, void *osdprivate); |
| 91 | 91 | |
| 92 | 92 | |
| 93 | | // a single "character" in the debug view has an Unicode value and an attribute byte |
| 93 | //! a single "character" in the debug view has an Unicode value and an attribute byte |
| 94 | 94 | struct debug_view_char |
| 95 | 95 | { |
| 96 | 96 | unicode_char uchar; |
| r26170 | r26171 | |
| 98 | 98 | }; |
| 99 | 99 | |
| 100 | 100 | |
| 101 | | // pair of X,Y coordinates for sizing |
| 101 | //! pair of X,Y coordinates for sizing |
| 102 | 102 | class debug_view_xy |
| 103 | 103 | { |
| 104 | 104 | public: |
| r26170 | r26171 | |
| 109 | 109 | }; |
| 110 | 110 | |
| 111 | 111 | |
| 112 | | // debug_view_sources select from multiple sources available within a view |
| 112 | //! debug_view_sources select from multiple sources available within a view |
| 113 | 113 | class debug_view_source |
| 114 | 114 | { |
| 115 | 115 | DISABLE_COPYING(debug_view_source); |
| r26170 | r26171 | |
| 129 | 129 | |
| 130 | 130 | private: |
| 131 | 131 | // internal state |
| 132 | | debug_view_source * m_next; // link to next item |
| 133 | | astring m_name; // name of the source item |
| 134 | | device_t * m_device; // associated device (if applicable) |
| 135 | | bool m_is_octal; // is view in octal or hex |
| 132 | debug_view_source * m_next; //!< link to next item |
| 133 | astring m_name; //!< name of the source item |
| 134 | device_t * m_device; //!< associated device (if applicable) |
| 135 | bool m_is_octal; //!< is view in octal or hex |
| 136 | 136 | }; |
| 137 | 137 | |
| 138 | 138 | |
| r26170 | r26171 | |
| 161 | 161 | |
| 162 | 162 | private: |
| 163 | 163 | // internal state |
| 164 | | running_machine & m_machine; // reference to our machine |
| 165 | | debug_view_source * m_head; // head of the list |
| 166 | | debug_view_source * m_tail; // end of the tail |
| 167 | | UINT32 m_count; // number of items in the list |
| 164 | running_machine & m_machine; //!< reference to our machine |
| 165 | debug_view_source * m_head; //!< head of the list |
| 166 | debug_view_source * m_tail; //!< end of the tail |
| 167 | UINT32 m_count; //!< number of items in the list |
| 168 | 168 | }; |
| 169 | 169 | |
| 170 | 170 | |
| r26170 | r26171 | |
| 223 | 223 | |
| 224 | 224 | protected: |
| 225 | 225 | // core view data |
| 226 | | debug_view * m_next; // link to the next view |
| 227 | | debug_view_type m_type; // type of view |
| 228 | | const debug_view_source *m_source; // currently selected data source |
| 229 | | debug_view_source_list m_source_list; // list of available data sources |
| 226 | debug_view * m_next; //!< link to the next view |
| 227 | debug_view_type m_type; //!< type of view |
| 228 | const debug_view_source *m_source; //!< currently selected data source |
| 229 | debug_view_source_list m_source_list; //!< list of available data sources |
| 230 | 230 | |
| 231 | 231 | // OSD data |
| 232 | | debug_view_osd_update_func m_osdupdate; // callback for the update |
| 233 | | void * m_osdprivate; // OSD-managed private data |
| 232 | debug_view_osd_update_func m_osdupdate; //!< callback for the update |
| 233 | void * m_osdprivate; //!< OSD-managed private data |
| 234 | 234 | |
| 235 | 235 | // visibility info |
| 236 | | debug_view_xy m_visible; // visible size (in rows and columns) |
| 237 | | debug_view_xy m_total; // total size (in rows and columns) |
| 238 | | debug_view_xy m_topleft; // top-left visible position (in rows and columns) |
| 239 | | debug_view_xy m_cursor; // cursor position |
| 240 | | bool m_supports_cursor; // does this view support a cursor? |
| 241 | | bool m_cursor_visible; // is the cursor visible? |
| 236 | debug_view_xy m_visible; //!< visible size (in rows and columns) |
| 237 | debug_view_xy m_total; //!< total size (in rows and columns) |
| 238 | debug_view_xy m_topleft; //!< top-left visible position (in rows and columns) |
| 239 | debug_view_xy m_cursor; //!< cursor position |
| 240 | bool m_supports_cursor; //!< does this view support a cursor? |
| 241 | bool m_cursor_visible; //!< is the cursor visible? |
| 242 | 242 | |
| 243 | 243 | // update info |
| 244 | | bool m_recompute; // does this view require a recomputation? |
| 245 | | UINT8 m_update_level; // update level; updates when this hits 0 |
| 246 | | bool m_update_pending; // true if there is a pending update |
| 247 | | bool m_osd_update_pending; // true if there is a pending update |
| 248 | | debug_view_char * m_viewdata; // current array of view data |
| 249 | | int m_viewdata_size; // number of elements of the viewdata array |
| 244 | bool m_recompute; //!< does this view require a recomputation? |
| 245 | UINT8 m_update_level; //!< update level; updates when this hits 0 |
| 246 | bool m_update_pending; //!< true if there is a pending update |
| 247 | bool m_osd_update_pending; //!< true if there is a pending update |
| 248 | debug_view_char * m_viewdata; //!< current array of view data |
| 249 | int m_viewdata_size; //!< number of elements of the viewdata array |
| 250 | 250 | |
| 251 | 251 | private: |
| 252 | | running_machine & m_machine; // machine associated with this view |
| 252 | running_machine & m_machine; //!< machine associated with this view |
| 253 | 253 | }; |
| 254 | 254 | |
| 255 | 255 | |
| r26170 | r26171 | |
| 277 | 277 | debug_view *append(debug_view *view); |
| 278 | 278 | |
| 279 | 279 | // internal state |
| 280 | | running_machine & m_machine; // reference to our machine |
| 281 | | debug_view * m_viewlist; // list of views |
| 280 | running_machine & m_machine; //!< reference to our machine |
| 281 | debug_view * m_viewlist; //!< list of views |
| 282 | 282 | }; |
| 283 | 283 | |
| 284 | 284 | |
| r26170 | r26171 | |
| 308 | 308 | bool recompute(); |
| 309 | 309 | |
| 310 | 310 | // internal state |
| 311 | | running_machine & m_machine; // reference to the machine |
| 312 | | bool m_dirty; // true if the expression needs to be re-evaluated |
| 313 | | UINT64 m_result; // last result from the expression |
| 314 | | parsed_expression m_parsed; // parsed expression data |
| 315 | | astring m_string; // copy of the expression string |
| 311 | running_machine & m_machine; //!< reference to the machine |
| 312 | bool m_dirty; //!< true if the expression needs to be re-evaluated |
| 313 | UINT64 m_result; //!< last result from the expression |
| 314 | parsed_expression m_parsed; //!< parsed expression data |
| 315 | astring m_string; //!< copy of the expression string |
| 316 | 316 | }; |
| 317 | 317 | |
| 318 | 318 | |
branches/alto2/src/lib/util/unicode.c
| r26170 | r26171 | |
| 1 | 1 | // license:BSD-3-Clause |
| 2 | | // copyright-holders:Aaron Giles |
| 2 | // copyright-holders:Aaron Giles, Jürgen Buchmüller |
| 3 | 3 | /********************************************************************* |
| 4 | 4 | |
| 5 | 5 | unicode.c |
| r26170 | r26171 | |
| 11 | 11 | #include "unicode.h" |
| 12 | 12 | |
| 13 | 13 | |
| 14 | | /*------------------------------------------------- |
| 15 | | uchar_isvalid - return true if a given |
| 16 | | character is a legitimate unicode character |
| 17 | | -------------------------------------------------*/ |
| 18 | | |
| 14 | /** |
| 15 | * @brief test for legitimate unicode values |
| 16 | * |
| 17 | * return true if a given character is a legitimate unicode character |
| 18 | * |
| 19 | * @param uchar value to inspect |
| 20 | * @return non zero (true) if uchar is valid, 0 otherwise |
| 21 | */ |
| 19 | 22 | int uchar_isvalid(unicode_char uchar) |
| 20 | 23 | { |
| 21 | 24 | return (uchar < 0x110000) && !((uchar >= 0xd800) && (uchar <= 0xdfff)); |
| 22 | 25 | } |
| 23 | 26 | |
| 24 | 27 | |
| 25 | | /*------------------------------------------------- |
| 26 | | uchar_from_utf8 - convert a UTF-8 sequence |
| 27 | | into a unicode character |
| 28 | | -------------------------------------------------*/ |
| 29 | | |
| 28 | /** |
| 29 | * @brief convert an UTF-8 sequence into an unicode character |
| 30 | * @param uchar pointer to the resulting unicode_char |
| 31 | * @param utf8char pointer to the source string (may be NULL) |
| 32 | * @param count number of characters available in utf8char |
| 33 | * @return the number of characters used |
| 34 | */ |
| 30 | 35 | int uchar_from_utf8(unicode_char *uchar, const char *utf8char, size_t count) |
| 31 | 36 | { |
| 32 | 37 | unicode_char c, minchar; |
| r26170 | r26171 | |
| 117 | 122 | } |
| 118 | 123 | |
| 119 | 124 | |
| 120 | | /*------------------------------------------------- |
| 121 | | uchar_from_utf16 - convert a UTF-16 sequence |
| 122 | | into a unicode character |
| 123 | | -------------------------------------------------*/ |
| 124 | | |
| 125 | /** |
| 126 | * @brief convert a UTF-16 sequence into an unicode character |
| 127 | * @param uchar pointer to the resulting unicode_char |
| 128 | * @param utf16char pointer to the source string (may be NULL) |
| 129 | * @param count number of characters available in utf16char |
| 130 | * @return the number of characters used |
| 131 | */ |
| 125 | 132 | int uchar_from_utf16(unicode_char *uchar, const utf16_char *utf16char, size_t count) |
| 126 | 133 | { |
| 127 | 134 | int rc = -1; |
| r26170 | r26171 | |
| 151 | 158 | } |
| 152 | 159 | |
| 153 | 160 | |
| 154 | | /*------------------------------------------------- |
| 155 | | uchar_from_utf16f - convert a UTF-16 sequence |
| 156 | | into a unicode character from a flipped |
| 157 | | byte order |
| 158 | | -------------------------------------------------*/ |
| 159 | | |
| 161 | /** |
| 162 | * @brief convert a UTF-16 sequence into an unicode character from a flipped byte order |
| 163 | * |
| 164 | * This flips endianness of the first two utf16_char in a local |
| 165 | * copy and then calls uchar_from_utf16. |
| 166 | * |
| 167 | * @param uchar pointer to the resulting unicode_char |
| 168 | * @param utf16char pointer to the source string (may be NULL) |
| 169 | * @param count number of characters available in utf16char |
| 170 | * @return the number of characters used |
| 171 | */ |
| 160 | 172 | int uchar_from_utf16f(unicode_char *uchar, const utf16_char *utf16char, size_t count) |
| 161 | 173 | { |
| 162 | 174 | utf16_char buf[2] = {0}; |
| r26170 | r26171 | |
| 168 | 180 | } |
| 169 | 181 | |
| 170 | 182 | |
| 171 | | /*------------------------------------------------- |
| 172 | | utf8_from_uchar - convert a unicode character |
| 173 | | into a UTF-8 sequence |
| 174 | | -------------------------------------------------*/ |
| 175 | | |
| 183 | /** |
| 184 | * @brief convert an unicode character into a UTF-8 sequence |
| 185 | * @param utf8string pointer to the result char array |
| 186 | * @param count number of characters that can be written to utf8string |
| 187 | * @param uchar unciode_char value to convert |
| 188 | * @return -1 on error, or the number of chars written on success (1 to 6) |
| 189 | */ |
| 176 | 190 | int utf8_from_uchar(char *utf8string, size_t count, unicode_char uchar) |
| 177 | 191 | { |
| 178 | 192 | int rc = 0; |
| r26170 | r26171 | |
| 245 | 259 | return rc; |
| 246 | 260 | } |
| 247 | 261 | |
| 248 | | |
| 249 | | /*------------------------------------------------- |
| 250 | | utf16_from_uchar - convert a unicode character |
| 251 | | into a UTF-16 sequence |
| 252 | | -------------------------------------------------*/ |
| 253 | | |
| 262 | /** |
| 263 | * @brief convert an unicode character into a UTF-16 sequence |
| 264 | * @param utf16string pointer to the result array of utf16_char |
| 265 | * @param count number of characters that can be written to utf16string |
| 266 | * @param uchar unciode_char value to convert |
| 267 | * @return -1 on error, or the number of utf16_char written on success (1 or 2) |
| 268 | */ |
| 254 | 269 | int utf16_from_uchar(utf16_char *utf16string, size_t count, unicode_char uchar) |
| 255 | 270 | { |
| 256 | 271 | int rc; |
| r26170 | r26171 | |
| 282 | 297 | return rc; |
| 283 | 298 | } |
| 284 | 299 | |
| 285 | | |
| 286 | | /*------------------------------------------------- |
| 287 | | utf16_from_uchar - convert a unicode character |
| 288 | | into a UTF-16 sequence with flipped endianness |
| 289 | | -------------------------------------------------*/ |
| 290 | | |
| 300 | /** |
| 301 | * @brief convert an unicode character into a UTF-16 sequence with flipped endianness |
| 302 | * @param utf16string pointer to the result array of utf16_char |
| 303 | * @param count number of characters that can be written to utf16string |
| 304 | * @param uchar unciode_char value to convert |
| 305 | * @return -1 on error, or the number of utf16_char written on success (1 or 2) |
| 306 | */ |
| 291 | 307 | int utf16f_from_uchar(utf16_char *utf16string, size_t count, unicode_char uchar) |
| 292 | 308 | { |
| 293 | 309 | int rc; |
| 294 | 310 | utf16_char buf[2] = { 0, 0 }; |
| 295 | 311 | |
| 296 | | rc = utf16_from_uchar(buf, count, uchar); |
| 312 | rc = utf16_from_uchar(buf, 2, uchar); |
| 297 | 313 | |
| 298 | | if (rc >= 1) |
| 314 | if (rc >= 1 && count >= 1) |
| 299 | 315 | utf16string[0] = FLIPENDIAN_INT16(buf[0]); |
| 300 | | if (rc >= 2) |
| 316 | if (rc >= 2 && count >= 2) |
| 301 | 317 | utf16string[1] = FLIPENDIAN_INT16(buf[1]); |
| 302 | | return rc; |
| 318 | return rc < count ? rc : count; |
| 303 | 319 | } |
| 304 | 320 | |
| 305 | 321 | |
| 306 | | /*------------------------------------------------- |
| 307 | | utf8_previous_char - return a pointer to the |
| 308 | | previous character in a string |
| 309 | | -------------------------------------------------*/ |
| 310 | | |
| 322 | /** |
| 323 | * @brief return a pointer to the previous character in a string |
| 324 | * @param utf8string const pointer to the starting position in the string |
| 325 | * @return pointer to the character which is not an UTF-8 auxiliary character |
| 326 | */ |
| 311 | 327 | const char *utf8_previous_char(const char *utf8string) |
| 312 | 328 | { |
| 313 | 329 | while ((*--utf8string & 0xc0) == 0x80) |
| r26170 | r26171 | |
| 315 | 331 | return utf8string; |
| 316 | 332 | } |
| 317 | 333 | |
| 318 | | |
| 319 | | /*------------------------------------------------- |
| 320 | | utf8_is_valid_string - return true if the |
| 321 | | given string is a properly formed sequence of |
| 322 | | UTF-8 characters |
| 323 | | -------------------------------------------------*/ |
| 324 | | |
| 334 | /** |
| 335 | * @brief return true if the given string is a properly formed sequence of UTF-8 characters |
| 336 | * @param utf8string const pointer to the source string |
| 337 | * @return TRUE if the string is valid, FALSE otherwise |
| 338 | */ |
| 325 | 339 | int utf8_is_valid_string(const char *utf8string) |
| 326 | 340 | { |
| 327 | 341 | int remaining_length = strlen(utf8string); |
| r26170 | r26171 | |
| 344 | 358 | return TRUE; |
| 345 | 359 | } |
| 346 | 360 | |
| 347 | | /*------------------------------------------------- |
| 348 | | unicode_load_table - load a lookup table |
| 349 | | for e.g. ISO-8859-1 to Unicode from a file |
| 350 | | The expected format is "Format A" defined |
| 351 | | by unicode.org |
| 352 | | -------------------------------------------------*/ |
| 361 | /** |
| 362 | * @brief return the number of decoded Unicode values in UTF-8 encoded string |
| 363 | * @param src pointer to the array of UTF-8 encoded characters |
| 364 | * @return number of unicode_char values decoded from the UTF-8 string |
| 365 | */ |
| 366 | size_t utf8_strlen(const char* src) |
| 367 | { |
| 368 | int total = 0; |
| 369 | while (*src) { |
| 370 | unicode_char uchar; |
| 371 | int len = uchar_from_utf8(&uchar, src, strlen(src)); |
| 372 | if (len < 0) |
| 373 | break; // invalid UTF-8 |
| 374 | total++; |
| 375 | src += len; |
| 376 | } |
| 377 | return total; |
| 378 | } |
| 379 | |
| 380 | /** |
| 381 | * @brief load a lookup table 8 bit codes to Unicode values |
| 382 | * |
| 383 | * This opens and reads a file %name which has to be in the |
| 384 | * unicode.org defined "Format A". |
| 385 | * That is three columns |
| 386 | * column 1: hex encoded 8 bit value of the code |
| 387 | * column 2: hex encoded 32 bit (max) unicode value |
| 388 | * column 3: a hash (#) and optional comment until the end-of-line |
| 389 | * |
| 390 | * @param name name of the (text) file to parse |
| 391 | * @return pointer to a newly allocated array of 256 unicode_char values |
| 392 | */ |
| 353 | 393 | unicode_char * uchar_table_load(const char* name) |
| 354 | 394 | { |
| 355 | 395 | FILE* file = fopen(name, "r"); |
| r26170 | r26171 | |
| 384 | 424 | return table; |
| 385 | 425 | } |
| 386 | 426 | |
| 427 | /** |
| 428 | * @brief return the 8 bit code that is mapped to the specified unicode_char |
| 429 | * @param table table of 256 unicode_char values to use for the reverse lookup |
| 430 | * @param uchar unicode value to revers lookup |
| 431 | * @return UINT8 with the 8 bit code, or 255 if uchar wasn't found |
| 432 | */ |
| 387 | 433 | UINT8 uchar_table_index(unicode_char* table, unicode_char uchar) |
| 388 | 434 | { |
| 389 | 435 | UINT8 index; |
| r26170 | r26171 | |
| 393 | 439 | return index; |
| 394 | 440 | } |
| 395 | 441 | |
| 442 | /** |
| 443 | * @brief free an unicode lookup table |
| 444 | * @param table |
| 445 | */ |
| 396 | 446 | void uchar_table_free(unicode_char* table) |
| 397 | 447 | { |
| 398 | 448 | if (table) |
| 399 | 449 | free(table); |
| 400 | 450 | } |
| 401 | 451 | |
| 452 | /** |
| 453 | * @brief return the unicode_char array length |
| 454 | * @param src pointer to an array of unicode_char |
| 455 | * @return length of the array until the first 0 |
| 456 | */ |
| 457 | size_t uchar_strlen(const unicode_char* src) |
| 458 | { |
| 459 | int len = 0; |
| 460 | while (*src++) |
| 461 | len++; |
| 462 | return len; |
| 463 | } |
| 464 | |
| 465 | /** |
| 466 | * @brief compare two unicode_char arrays |
| 467 | * @param dst pointer to the first array of unicode_char |
| 468 | * @param src pointer to the second array of unicode_char |
| 469 | * @return 0 if dst == src, -1 if dst < src or +1 otherwise |
| 470 | */ |
| 471 | int uchar_strcmp(const unicode_char* dst, const unicode_char* src) |
| 472 | { |
| 473 | while (*src && *dst && *src == *dst) |
| 474 | { |
| 475 | src++; |
| 476 | dst++; |
| 477 | } |
| 478 | if (*src != *dst) |
| 479 | return *src < *dst ? -1 : +1; |
| 480 | return 0; |
| 481 | } |
| 482 | |
| 483 | /** |
| 484 | * @brief compare two unicode_char arrays with length limiting |
| 485 | * @param dst pointer to the first array of unicode_char |
| 486 | * @param src pointer to the second array of unicode_char |
| 487 | * @param len maximum number of unicode_char to compare |
| 488 | * @return 0 if dst == src, -1 if dst < src or +1 otherwise |
| 489 | */ |
| 490 | int uchar_strncmp(const unicode_char* dst, const unicode_char* src, size_t len) |
| 491 | { |
| 492 | while (*src && *dst && *src == *dst && len > 0) |
| 493 | { |
| 494 | src++; |
| 495 | dst++; |
| 496 | len--; |
| 497 | } |
| 498 | if (*src != *dst) |
| 499 | return *src < *dst ? -1 : +1; |
| 500 | return 0; |
| 501 | } |
| 502 | |
| 503 | /** |
| 504 | * @brief print a formatted string of ASCII characters to an unicode_char array |
| 505 | * @param dst pointer to the array |
| 506 | * @param format format string followed by optional parameters |
| 507 | * @return number of unicode_char stored in dst |
| 508 | */ |
| 509 | int uchar_sprintf(unicode_char* dst, const char* format, ...) |
| 510 | { |
| 511 | va_list ap; |
| 512 | char buff[256]; |
| 513 | va_start(ap, format); |
| 514 | int len = vsnprintf(buff, sizeof(buff), format, ap); |
| 515 | va_end(ap); |
| 516 | for (int i = 0; i < len; i++) |
| 517 | *dst++ = buff[i]; |
| 518 | *dst = 0; |
| 519 | return len; |
| 520 | } |
| 521 | |
| 522 | /** |
| 523 | * @brief copy an array of unicode_char from source to destination |
| 524 | * |
| 525 | * @param dst pointer to destination array |
| 526 | * @param src const pointer to the source array |
| 527 | * @return a pointer to the original destination |
| 528 | */ |
| 529 | unicode_char* uchar_strcpy(unicode_char* dst, const unicode_char* src) |
| 530 | { |
| 531 | unicode_char* str = dst; |
| 532 | while (*src) |
| 533 | *dst++ = *src++; |
| 534 | return str; |
| 535 | } |
| 536 | |
| 537 | /** |
| 538 | * @brief copy a length limited array of unicode_char from source to destination |
| 539 | * |
| 540 | * This function always terminates dst with a 0 unicode_char, unlike some |
| 541 | * classic libc implementations of strncpy(). This means that actually at |
| 542 | * most len-1 unicode_char are copied from src to leave room for the 0 code. |
| 543 | * |
| 544 | * @param dst pointer to destination array |
| 545 | * @param src const pointer to the source array |
| 546 | * @param len maximum number of unicode_char to copy |
| 547 | * @return a pointer to the original destination |
| 548 | */ |
| 549 | unicode_char* uchar_strncpy(unicode_char* dst, const unicode_char* src, size_t len) |
| 550 | { |
| 551 | unicode_char* str = dst; |
| 552 | while (*src && len > 1) |
| 553 | { |
| 554 | *dst++ = *src++; |
| 555 | len--; |
| 556 | } |
| 557 | if (len > 0) |
| 558 | *dst = 0; |
| 559 | return str; |
| 560 | } |
| 561 | |
| 402 | 562 | /*************************************************************************** |
| 403 | 563 | * |
| 404 | 564 | * Parsing and access to the UnicodeData table published at unicode.org |
| 405 | 565 | * |
| 406 | 566 | ***************************************************************************/ |
| 407 | 567 | |
| 408 | | //! Information about a unicode_char |
| 568 | //! Information about an unicode_char |
| 409 | 569 | typedef struct { |
| 410 | 570 | #if NEED_UNICODE_NAME |
| 411 | 571 | char *name; //!< name of the character |
| r26170 | r26171 | |
| 552 | 712 | {0xfe70, 0xfeff, "Arabic Presentation Forms-B"}, |
| 553 | 713 | {0xff00, 0xffef, "Halfwidth and Fullwidth Forms"}, |
| 554 | 714 | {0xfff0, 0xffff, "Specials"} |
| 715 | // FIXME: add ranges for the Unicode planes 1 to 16 |
| 555 | 716 | }; |
| 556 | 717 | #endif |
| 557 | 718 | |
| 558 | 719 | #if NEED_UNICODE_CCOM |
| 559 | 720 | static const char *canonical_combining_str(UINT8 val) |
| 560 | 721 | { |
| 561 | | switch (val) { |
| 722 | switch (val) |
| 723 | { |
| 562 | 724 | case 0: return "Spacing, split, enclosing, reordrant, and Tibetan subjoined"; |
| 563 | 725 | case 1: return "Overlays and interior"; |
| 564 | 726 | case 7: return "Nuktas"; |
| r26170 | r26171 | |
| 620 | 782 | { |
| 621 | 783 | if (!unicode_data || uchar >= UNICODE_PLANESIZE || !unicode_data[uchar]) |
| 622 | 784 | return ""; |
| 623 | | switch (unicode_data[uchar]->gen_cat) { |
| 785 | switch (unicode_data[uchar]->gen_cat) |
| 786 | { |
| 624 | 787 | case gcat_Lu: return "Lu: Letter, Uppercase"; |
| 625 | 788 | case gcat_Ll: return "Ll: Letter, Lowercase"; |
| 626 | 789 | case gcat_Lt: return "Lt: Letter, Titlecase"; |
| r26170 | r26171 | |
| 685 | 848 | { |
| 686 | 849 | if (!unicode_data || uchar >= UNICODE_PLANESIZE || !unicode_data[uchar]) |
| 687 | 850 | return ""; |
| 688 | | switch (unicode_data[uchar]->bidi) { |
| 851 | switch (unicode_data[uchar]->bidi) |
| 852 | { |
| 689 | 853 | case bidi_L: return "L: Left-to-Right"; |
| 690 | 854 | case bidi_LRE: return "LRE: Left-to-Right Embedding"; |
| 691 | 855 | case bidi_LRO: return "LRO: Left-to-Right Override"; |
| r26170 | r26171 | |
| 723 | 887 | { |
| 724 | 888 | if (!unicode_data || uchar >= UNICODE_PLANESIZE || !unicode_data[uchar]) |
| 725 | 889 | return ""; |
| 726 | | switch (unicode_data[uchar]->decomp_map) { |
| 890 | switch (unicode_data[uchar]->decomp_map) |
| 891 | { |
| 727 | 892 | case deco_canonical: return "Canonical mapping"; |
| 728 | 893 | case deco_font: return "A font variant (e.g. a blackletter form)"; |
| 729 | 894 | case deco_noBreak: return "A no-break version of a space or hyphen"; |
| r26170 | r26171 | |
| 915 | 1080 | static UINT32 hit = 0; |
| 916 | 1081 | UINT32 i; |
| 917 | 1082 | |
| 918 | | for (i = hit; i < sizeof(unicode_ranges)/sizeof(unicode_ranges[0]); i++) { |
| 919 | | if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) { |
| 1083 | for (i = hit; i < sizeof(unicode_ranges)/sizeof(unicode_ranges[0]); i++) |
| 1084 | { |
| 1085 | if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) |
| 1086 | { |
| 920 | 1087 | hit = i; |
| 921 | 1088 | return unicode_ranges[i].name; |
| 922 | 1089 | } |
| 923 | 1090 | } |
| 924 | | for (i = 0; i < hit; i++) { |
| 925 | | if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) { |
| 1091 | for (i = 0; i < hit; i++) |
| 1092 | { |
| 1093 | if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) |
| 1094 | { |
| 926 | 1095 | hit = i; |
| 927 | 1096 | return unicode_ranges[i].name; |
| 928 | 1097 | } |
| r26170 | r26171 | |
| 936 | 1105 | static UINT32 hit = 0; |
| 937 | 1106 | UINT32 i; |
| 938 | 1107 | |
| 939 | | for (i = hit; i < sizeof(unicode_ranges)/sizeof(unicode_ranges[0]); i++) { |
| 940 | | if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) { |
| 1108 | for (i = hit; i < sizeof(unicode_ranges)/sizeof(unicode_ranges[0]); i++) |
| 1109 | { |
| 1110 | if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) |
| 1111 | { |
| 941 | 1112 | hit = i; |
| 942 | 1113 | return unicode_ranges[i].first; |
| 943 | 1114 | } |
| 944 | 1115 | } |
| 945 | | |
| 946 | | for (i = 0; i < hit; i++) { |
| 947 | | if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) { |
| 1116 | for (i = 0; i < hit; i++) |
| 1117 | { |
| 1118 | if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) |
| 1119 | { |
| 948 | 1120 | hit = i; |
| 949 | 1121 | return unicode_ranges[i].first; |
| 950 | 1122 | } |
| r26170 | r26171 | |
| 958 | 1130 | static UINT32 hit = 0; |
| 959 | 1131 | UINT32 i; |
| 960 | 1132 | |
| 961 | | for (i = hit; i < sizeof(unicode_ranges)/sizeof(unicode_ranges[0]); i++) { |
| 962 | | if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) { |
| 1133 | for (i = hit; i < sizeof(unicode_ranges)/sizeof(unicode_ranges[0]); i++) |
| 1134 | { |
| 1135 | if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) |
| 1136 | { |
| 963 | 1137 | hit = i; |
| 964 | 1138 | return unicode_ranges[i].last; |
| 965 | 1139 | } |
| 966 | 1140 | } |
| 967 | | |
| 968 | | for (i = 0; i < hit; i++) { |
| 969 | | if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) { |
| 1141 | for (i = 0; i < hit; i++) |
| 1142 | { |
| 1143 | if (unicode_ranges[i].first <= uchar && uchar <= unicode_ranges[i].last) |
| 1144 | { |
| 970 | 1145 | hit = i; |
| 971 | 1146 | return unicode_ranges[i].last; |
| 972 | 1147 | } |
| r26170 | r26171 | |
| 985 | 1160 | if (src) |
| 986 | 1161 | token = src; |
| 987 | 1162 | start = token; |
| 988 | | while (token && *token) { |
| 1163 | while (token && *token) |
| 1164 | { |
| 989 | 1165 | const char *d = delim; |
| 990 | | while (*d) { |
| 991 | | if (*token == *d) { |
| 1166 | while (*d) |
| 1167 | { |
| 1168 | if (*token == *d) |
| 1169 | { |
| 992 | 1170 | *token++ = '\0'; |
| 993 | 1171 | return start; |
| 994 | 1172 | } |
| r26170 | r26171 | |
| 1016 | 1194 | unicode_char first = 0; |
| 1017 | 1195 | unicode_char last = 0; |
| 1018 | 1196 | |
| 1019 | | while (fgets(line, 1024, file)) { |
| 1197 | while (fgets(line, 1024, file)) |
| 1198 | { |
| 1020 | 1199 | unicode_data_t u; |
| 1021 | 1200 | unicode_char code; |
| 1022 | 1201 | int tokennum = 1; |
| r26170 | r26171 | |
| 1034 | 1213 | tokennum++; |
| 1035 | 1214 | if (NULL == (token = parse_strtok(NULL, ";\r\n"))) |
| 1036 | 1215 | fprintf(stderr, "%s: token #%d failed on line %d\n%s", __FUNCTION__, tokennum, linenum, line); |
| 1037 | | if (NULL != token) { |
| 1216 | if (NULL != token) |
| 1217 | { |
| 1038 | 1218 | // check for a range description |
| 1039 | | if (token[0] == '<') { |
| 1219 | if (token[0] == '<') |
| 1220 | { |
| 1040 | 1221 | // strip a trailing ", First>" string fragment |
| 1041 | | if (0 == strcmp(token + strlen(token) - 8,", First>")) { |
| 1222 | if (0 == strcmp(token + strlen(token) - 8,", First>")) |
| 1223 | { |
| 1042 | 1224 | strcpy(token, token + 1); |
| 1043 | 1225 | token[strlen(token) - 8] = '\0'; |
| 1044 | 1226 | first = code; |
| 1045 | 1227 | } |
| 1046 | 1228 | // strip a trailing ", Last>" string fragment |
| 1047 | | if (0 == strcmp(token + strlen(token) - 7,", Last>")) { |
| 1229 | if (0 == strcmp(token + strlen(token) - 7,", Last>")) |
| 1230 | { |
| 1048 | 1231 | strcpy(token, token + 1); |
| 1049 | 1232 | token[strlen(token) - 7] = '\0'; |
| 1050 | 1233 | last = code; |
| r26170 | r26171 | |
| 1062 | 1245 | #if NEED_UNICODE_GCAT |
| 1063 | 1246 | // parse general category |
| 1064 | 1247 | u.gen_cat = gcat_0; |
| 1065 | | if (NULL != token) { |
| 1248 | if (NULL != token) |
| 1249 | { |
| 1066 | 1250 | if (0 == strcmp(token, "Lu")) |
| 1067 | 1251 | u.gen_cat = gcat_Lu; |
| 1068 | 1252 | if (0 == strcmp(token, "Ll")) |
| r26170 | r26171 | |
| 1140 | 1324 | #if NEED_UNICODE_BIDI |
| 1141 | 1325 | // parse bidirectional category |
| 1142 | 1326 | u.bidi = bidi_0; |
| 1143 | | if (NULL != token) { |
| 1327 | if (NULL != token) |
| 1328 | { |
| 1144 | 1329 | if (0 == strcmp(token, "L")) |
| 1145 | 1330 | u.bidi = bidi_L; // Left-to-Right |
| 1146 | 1331 | if (0 == strcmp(token, "LRE")) |
| r26170 | r26171 | |
| 1187 | 1372 | fprintf(stderr, "%s: token #%d failed on line %d\n%s", __FUNCTION__, tokennum, linenum, line); |
| 1188 | 1373 | #if NEED_UNICODE_DECO |
| 1189 | 1374 | // parse decomposition mapping |
| 1190 | | if (NULL != token) { |
| 1375 | if (NULL != token) |
| 1376 | { |
| 1191 | 1377 | unicode_char decomposed[256]; |
| 1192 | 1378 | UINT8 n = 0; |
| 1193 | 1379 | char *p = token; |
| r26170 | r26171 | |
| 1229 | 1415 | while (isspace(*p)) |
| 1230 | 1416 | p++; |
| 1231 | 1417 | // parse decomposition codes |
| 1232 | | while (*p) { |
| 1418 | while (*p) |
| 1419 | { |
| 1233 | 1420 | // skip initial whitespace |
| 1234 | 1421 | while (isspace(*p)) |
| 1235 | 1422 | p++; |
| r26170 | r26171 | |
| 1242 | 1429 | if (n >= 255) |
| 1243 | 1430 | break; |
| 1244 | 1431 | } |
| 1245 | | if (n > 0) { |
| 1432 | if (n > 0) |
| 1433 | { |
| 1246 | 1434 | u.n_decomp = n; |
| 1247 | 1435 | u.decomp_codes = (unicode_char*)malloc(sizeof(unicode_char) * n); |
| 1248 | 1436 | memcpy(u.decomp_codes, decomposed, sizeof(unicode_char) * n); |
| r26170 | r26171 | |
| 1296 | 1484 | tokennum++; |
| 1297 | 1485 | if (NULL == (token = parse_strtok(NULL, ";\r\n"))) |
| 1298 | 1486 | fprintf(stderr, "%s: token #%d failed on line %d\n%s", __FUNCTION__, tokennum, linenum, line); |
| 1299 | | if (NULL != token && *token) { |
| 1487 | if (NULL != token && *token) |
| 1488 | { |
| 1300 | 1489 | /* FIXME: hmm ... don't know what this token means */ |
| 1301 | 1490 | } |
| 1302 | 1491 | |
| r26170 | r26171 | |
| 1324 | 1513 | u.titlecase = strtoul(token, NULL, 16); |
| 1325 | 1514 | #endif |
| 1326 | 1515 | |
| 1327 | | if (first > 0 && last > 0) { |
| 1328 | | if (first + 1 >= UNICODE_PLANESIZE) { |
| 1516 | if (first > 0 && last > 0) |
| 1517 | { |
| 1518 | if (first + 1 >= UNICODE_PLANESIZE) |
| 1519 | { |
| 1329 | 1520 | fprintf(stderr, "%s: range %#07x-%#07x outside planes\n", __FUNCTION__, first + 1, last); |
| 1330 | | } else { |
| 1521 | } |
| 1522 | else |
| 1523 | { |
| 1331 | 1524 | for (code = first + 1; code <= last && code < UNICODE_PLANESIZE; code++) |
| 1332 | 1525 | unicode_data[code] = unicode_data[first]; |
| 1333 | 1526 | } |
| r26170 | r26171 | |
| 1335 | 1528 | last = 0; |
| 1336 | 1529 | code = UNICODE_PLANESIZE; |
| 1337 | 1530 | } |
| 1338 | | if (code < UNICODE_PLANESIZE) { |
| 1531 | if (code < UNICODE_PLANESIZE) |
| 1532 | { |
| 1339 | 1533 | unicode_data[code] = (unicode_data_t *)malloc(sizeof(unicode_data_t)); |
| 1340 | 1534 | memcpy(unicode_data[code], &u, sizeof(u)); |
| 1341 | 1535 | } |
branches/alto2/src/lib/util/unicode.h
| r26170 | r26171 | |
| 22 | 22 | |
| 23 | 23 | #include <stdlib.h> |
| 24 | 24 | #include <stdio.h> |
| 25 | #include <stdarg.h> |
| 25 | 26 | #include <ctype.h> |
| 26 | 27 | #include "osdcore.h" |
| 27 | 28 | |
| r26170 | r26171 | |
| 31 | 32 | CONSTANTS |
| 32 | 33 | ***************************************************************************/ |
| 33 | 34 | |
| 34 | | /* these defines specify the maximum size of different types of Unicode |
| 35 | | * character encodings */ |
| 36 | | #define UTF8_CHAR_MAX 6 |
| 37 | | #define UTF16_CHAR_MAX 2 |
| 38 | | |
| 39 | | /* these are UTF-8 encoded strings for common characters */ |
| 40 | | #define UTF8_NBSP "\xc2\xa0" /* non-breaking space */ |
| 41 | | #define UTF8_MULTIPLY "\xc3\x97" /* multiplication symbol */ |
| 42 | | #define UTF8_DEGREES "\xc2\xb0" /* degrees symbol */ |
| 43 | | |
| 44 | | #define a_RING "\xc3\xa5" /* small a with a ring */ |
| 45 | | #define a_UMLAUT "\xc3\xa4" /* small a with an umlaut */ |
| 46 | | #define o_UMLAUT "\xc3\xb6" /* small o with an umlaut */ |
| 47 | | #define u_UMLAUT "\xc3\xbc" /* small u with an umlaut */ |
| 48 | | #define e_ACUTE "\xc3\xa9" /* small e with an acute */ |
| 49 | | |
| 50 | | #define A_RING "\xc3\x85" /* capital A with a ring */ |
| 51 | | #define A_UMLAUT "\xc3\x84" /* capital A with an umlaut */ |
| 52 | | #define O_UMLAUT "\xc3\x96" /* capital O with an umlaut */ |
| 53 | | #define U_UMLAUT "\xc3\x9c" /* capital U with an umlaut */ |
| 54 | | #define E_ACUTE "\xc3\x89" /* capital E with an acute */ |
| 55 | | |
| 56 | | #define UTF8_LEFT "\xe2\x86\x90" /* cursor left */ |
| 57 | | #define UTF8_RIGHT "\xe2\x86\x92" /* cursor right */ |
| 58 | | #define UTF8_UP "\xe2\x86\x91" /* cursor up */ |
| 59 | | #define UTF8_DOWN "\xe2\x86\x93" /* cursor down */ |
| 60 | | |
| 61 | | |
| 62 | | |
| 63 | | /*************************************************************************** |
| 64 | | TYPE DEFINITIONS |
| 65 | | ***************************************************************************/ |
| 66 | | |
| 67 | | typedef UINT16 utf16_char; |
| 68 | | typedef UINT32 unicode_char; |
| 69 | | |
| 70 | | |
| 71 | | |
| 72 | | /*************************************************************************** |
| 73 | | FUNCTION PROTOTYPES |
| 74 | | ***************************************************************************/ |
| 75 | | |
| 76 | | /* tests to see if a unicode char is a valid code point */ |
| 77 | | int uchar_isvalid(unicode_char uchar); |
| 78 | | |
| 79 | | /* converting strings to 32-bit Unicode chars */ |
| 80 | | int uchar_from_utf8(unicode_char *uchar, const char *utf8char, size_t count); |
| 81 | | int uchar_from_utf16(unicode_char *uchar, const utf16_char *utf16char, size_t count); |
| 82 | | int uchar_from_utf16f(unicode_char *uchar, const utf16_char *utf16char, size_t count); |
| 83 | | |
| 84 | | /* converting 32-bit Unicode chars to strings */ |
| 85 | | int utf8_from_uchar(char *utf8string, size_t count, unicode_char uchar); |
| 86 | | int utf16_from_uchar(utf16_char *utf16string, size_t count, unicode_char uchar); |
| 87 | | int utf16f_from_uchar(utf16_char *utf16string, size_t count, unicode_char uchar); |
| 88 | | |
| 89 | | /* misc UTF-8 helpers */ |
| 90 | | const char *utf8_previous_char(const char *utf8string); |
| 91 | | int utf8_is_valid_string(const char *utf8string); |
| 92 | | |
| 93 | | /* Unicode lookup table loader */ |
| 94 | | //! load a table translating UINT8 (unsigned char) to Unicode values |
| 95 | | unicode_char * uchar_table_load(const char* name); |
| 96 | | |
| 97 | | //! reverse lookup of uchar in a Unicode table - returns 255 if not found |
| 98 | | UINT8 uchar_table_index(unicode_char* table, unicode_char uchar); |
| 99 | | |
| 100 | | //! free a unicode table |
| 101 | | void uchar_table_free(unicode_char* table); |
| 102 | | |
| 103 | | /*************************************************************************** |
| 104 | | * unicode.org published UnicodeData.txt |
| 105 | | * parser and accessors |
| 106 | | ***************************************************************************/ |
| 107 | | |
| 108 | | //! load the UnicodeData.txt file an parse it |
| 109 | | int unicode_data_load(const char* name); |
| 110 | | |
| 111 | | //! free the UnicodeData.txt table memory |
| 112 | | void unicode_data_free(); |
| 113 | | |
| 114 | 35 | //! size of the first 17 Unicode planes |
| 115 | 36 | #define UNICODE_PLANESIZE 0x110000 |
| 116 | 37 | |
| 117 | 38 | #ifndef NEED_UNICODE_RANGES |
| 118 | 39 | #define NEED_UNICODE_RANGES 1 //!< define to 1, if the name, first or last of the range of a code is needed |
| 119 | 40 | #endif |
| 120 | | |
| 121 | 41 | #ifndef NEED_UNICODE_NAME |
| 122 | 42 | #define NEED_UNICODE_NAME 1 //!< define to 1, if the name of a code is needed |
| 123 | 43 | #endif |
| 124 | | |
| 125 | 44 | #ifndef NEED_UNICODE_NAME10 |
| 126 | 45 | #define NEED_UNICODE_NAME10 1 //!< define to 1, if the short name of a code is needed |
| 127 | 46 | #endif |
| 128 | | |
| 129 | 47 | #ifndef NEED_UNICODE_GCAT |
| 130 | 48 | #define NEED_UNICODE_GCAT 1 //!< define to 1, if the general category of a code is needed |
| 131 | 49 | #endif |
| 132 | | |
| 133 | 50 | #ifndef NEED_UNICODE_CCOM |
| 134 | 51 | #define NEED_UNICODE_CCOM 1 //!< define to 1, if the canonical combining (name) of a code is needed |
| 135 | 52 | #endif |
| 136 | | |
| 137 | 53 | #ifndef NEED_UNICODE_BIDI |
| 138 | 54 | #define NEED_UNICODE_BIDI 1 //!< define to 1, if the bidirectional category of a code is needed |
| 139 | 55 | #endif |
| 140 | | |
| 141 | 56 | #ifndef NEED_UNICODE_DECO |
| 142 | 57 | #define NEED_UNICODE_DECO 1 //!< define to 1, if the decomposition codes of a code are needed |
| 143 | 58 | #endif |
| 144 | | |
| 145 | 59 | #ifndef NEED_UNICODE_DECIMAL |
| 146 | 60 | #define NEED_UNICODE_DECIMAL 1 //!< define to 1, if the decimal value of a code is needed |
| 147 | 61 | #endif |
| 148 | | |
| 149 | 62 | #ifndef NEED_UNICODE_DIGIT |
| 150 | 63 | #define NEED_UNICODE_DIGIT 1 //!< define to 1, if the digit value of a code is needed |
| 151 | 64 | #endif |
| 152 | | |
| 153 | 65 | #ifndef NEED_UNICODE_NUMERIC |
| 154 | 66 | #define NEED_UNICODE_NUMERIC 1 //!< define to 1, if the numeric value of a code is needed |
| 155 | 67 | #endif |
| 156 | | |
| 157 | 68 | #ifndef NEED_UNICODE_MIRRORED |
| 158 | 69 | #define NEED_UNICODE_MIRRORED 1 //!< define to 1, if the mirrored flag of a code is needed |
| 159 | 70 | #endif |
| 160 | | |
| 161 | 71 | #ifndef NEED_UNICODE_DECN |
| 162 | 72 | #define NEED_UNICODE_DECN 1 //!< define to 1, if access to decomposed code [n] of a code is needed |
| 163 | 73 | #endif |
| 164 | | |
| 165 | 74 | #ifndef NEED_UNICODE_UCASE |
| 166 | 75 | #define NEED_UNICODE_UCASE 1 //!< define to 1, if the upper case value of a code is needed |
| 167 | 76 | #endif |
| 168 | | |
| 169 | 77 | #ifndef NEED_UNICODE_LCASE |
| 170 | 78 | #define NEED_UNICODE_LCASE 1 //!< define to 1, if the lower case value of a code is needed |
| 171 | 79 | #endif |
| 172 | | |
| 173 | 80 | #ifndef NEED_UNICODE_TCASE |
| 174 | 81 | #define NEED_UNICODE_TCASE 1 //!< define to 1, if the title case value of a code is needed |
| 175 | 82 | #endif |
| 176 | | |
| 177 | 83 | #ifndef NEED_UNICODE_WIDTH |
| 178 | 84 | #define NEED_UNICODE_WIDTH 1 //!< define to 1, if the glyph width of a code is needed |
| 179 | 85 | #endif |
| 180 | 86 | |
| 87 | #define UTF8_CHAR_MAX 6 //!< maximum size of Unicode UTF-8 encoding |
| 88 | #define UTF16_CHAR_MAX 2 //!< maximum size of Unicode UTF-16 encoding |
| 89 | |
| 90 | /* these are UTF-8 encoded strings for common characters */ |
| 91 | #define UTF8_NBSP "\xc2\xa0" //!< non-breaking space |
| 92 | #define UTF8_MULTIPLY "\xc3\x97" //!< multiplication symbol |
| 93 | #define UTF8_DEGREES "\xc2\xb0" //!< degrees symbol |
| 94 | |
| 95 | #define a_RING "\xc3\xa5" //!< small a with a ring |
| 96 | #define a_UMLAUT "\xc3\xa4" //!< small a with an umlaut |
| 97 | #define o_UMLAUT "\xc3\xb6" //!< small o with an umlaut |
| 98 | #define u_UMLAUT "\xc3\xbc" //!< small u with an umlaut |
| 99 | #define e_ACUTE "\xc3\xa9" //!< small e with an acute |
| 100 | |
| 101 | #define A_RING "\xc3\x85" //!< capital A with a ring |
| 102 | #define A_UMLAUT "\xc3\x84" //!< capital A with an umlaut |
| 103 | #define O_UMLAUT "\xc3\x96" //!< capital O with an umlaut |
| 104 | #define U_UMLAUT "\xc3\x9c" //!< capital U with an umlaut |
| 105 | #define E_ACUTE "\xc3\x89" //!< capital E with an acute |
| 106 | |
| 107 | #define UTF8_LEFT "\xe2\x86\x90" //!< cursor left |
| 108 | #define UTF8_RIGHT "\xe2\x86\x92" //!< cursor right |
| 109 | #define UTF8_UP "\xe2\x86\x91" //!< cursor up |
| 110 | #define UTF8_DOWN "\xe2\x86\x93" //!< cursor down |
| 111 | |
| 112 | |
| 113 | |
| 114 | /*************************************************************************** |
| 115 | TYPE DEFINITIONS |
| 116 | ***************************************************************************/ |
| 117 | |
| 118 | typedef UINT16 utf16_char; //!< type used for UTF-16 encoded values |
| 119 | typedef UINT32 unicode_char; //!< type used for full width Unicode values |
| 120 | |
| 121 | |
| 122 | |
| 123 | /*************************************************************************** |
| 124 | FUNCTION PROTOTYPES |
| 125 | ***************************************************************************/ |
| 126 | |
| 127 | //! tests to see if a unicode char is a valid code point |
| 128 | int uchar_isvalid(unicode_char uchar); |
| 129 | |
| 130 | //! convert an UTF-8 sequence into an unicode character |
| 131 | int uchar_from_utf8(unicode_char *uchar, const char *utf8char, size_t count); |
| 132 | |
| 133 | //! convert a UTF-16 sequence into an unicode character |
| 134 | int uchar_from_utf16(unicode_char *uchar, const utf16_char *utf16char, size_t count); |
| 135 | |
| 136 | //! convert a UTF-16 sequence into an unicode character from a flipped byte order |
| 137 | int uchar_from_utf16f(unicode_char *uchar, const utf16_char *utf16char, size_t count); |
| 138 | |
| 139 | //! convert an unicode character into a UTF-8 sequence |
| 140 | int utf8_from_uchar(char *utf8string, size_t count, unicode_char uchar); |
| 141 | |
| 142 | //! convert an unicode character into a UTF-16 sequence |
| 143 | int utf16_from_uchar(utf16_char *utf16string, size_t count, unicode_char uchar); |
| 144 | |
| 145 | //! convert an unicode character into a UTF-16 sequence with flipped endianness |
| 146 | int utf16f_from_uchar(utf16_char *utf16string, size_t count, unicode_char uchar); |
| 147 | |
| 148 | /* misc UTF-8 helpers */ |
| 149 | //! return a pointer to the previous character in a string |
| 150 | const char *utf8_previous_char(const char *utf8string); |
| 151 | |
| 152 | //! return true if the given string is a properly formed sequence of UTF-8 characters |
| 153 | int utf8_is_valid_string(const char *utf8string); |
| 154 | |
| 155 | //! return the number of decoded Unicode values in UTF-8 encoded string |
| 156 | size_t utf8_strlen(const char* src); |
| 157 | |
| 158 | /* 8 bit code to Unicode value lookup table handling (e.g. ISO-8859-1 aka Latin1) */ |
| 159 | //! load a table translating UINT8 (unsigned char) to Unicode values |
| 160 | unicode_char * uchar_table_load(const char* name); |
| 161 | |
| 162 | //! reverse lookup of uchar in a Unicode table |
| 163 | UINT8 uchar_table_index(unicode_char* table, unicode_char uchar); |
| 164 | |
| 165 | //! free a unicode table |
| 166 | void uchar_table_free(unicode_char* table); |
| 167 | |
| 168 | /* unicode_char array functions - string.h like */ |
| 169 | //! return the unicode_char array length |
| 170 | size_t uchar_strlen(const unicode_char* src); |
| 171 | |
| 172 | //! compare two unicode_char arrays |
| 173 | int uchar_strcmp(const unicode_char* dst, const unicode_char* src); |
| 174 | |
| 175 | //! compare two unicode_char arrays with length limiting |
| 176 | int uchar_strncmp(const unicode_char* dst, const unicode_char* src, size_t len); |
| 177 | |
| 178 | //! print a formatted string of ASCII characters to an unicode_char array (max 256 characters) |
| 179 | int uchar_sprintf(unicode_char* dst, const char* format, ...); |
| 180 | |
| 181 | //! copy an array of unicode_char from source to destination |
| 182 | unicode_char* uchar_strcpy(unicode_char* dst, const unicode_char* src); |
| 183 | |
| 184 | //! copy a length limited array of unicode_char from source to destination |
| 185 | unicode_char* uchar_strncpy(unicode_char* dst, const unicode_char* src, size_t len); |
| 186 | |
| 187 | /*************************************************************************** |
| 188 | * unicode.org published UnicodeData.txt |
| 189 | * Parser and property accessors |
| 190 | ***************************************************************************/ |
| 191 | |
| 192 | //! load the specified UnicodeData.txt file an parse it |
| 193 | int unicode_data_load(const char* name); |
| 194 | |
| 195 | //! free the UnicodeData.txt table memory |
| 196 | void unicode_data_free(); |
| 197 | |
| 181 | 198 | #if NEED_UNICODE_GCAT |
| 199 | /** |
| 200 | * @brief enumeration of the possible general categories |
| 201 | */ |
| 182 | 202 | typedef enum { |
| 183 | 203 | gcat_0, //!< invalid value |
| 184 | 204 | gcat_Lu, //!< Letter, Uppercase |
| r26170 | r26171 | |
| 215 | 235 | #endif |
| 216 | 236 | |
| 217 | 237 | #if NEED_UNICODE_BIDI |
| 238 | /** |
| 239 | * @brief enumeration of the possible bidirectional categories |
| 240 | */ |
| 218 | 241 | typedef enum { |
| 219 | 242 | bidi_0, //!< invalid value |
| 220 | 243 | bidi_L, //!< Left-to-Right |
| r26170 | r26171 | |
| 240 | 263 | #endif |
| 241 | 264 | |
| 242 | 265 | #if NEED_UNICODE_DECO |
| 266 | /** |
| 267 | * @brief enumeration of the possible decomposition mappings |
| 268 | */ |
| 243 | 269 | typedef enum { |
| 244 | 270 | deco_0, //!< invalid value |
| 245 | 271 | deco_canonical, //!< canonical mapping |