Previous 199869 Revisions Next

r26223 Sunday 17th November, 2013 at 09:23:58 UTC by Jürgen Buchmüller
Update Unicode ranges to that of 6.3.0 Blocks.txt
[/branches/alto2/src/lib/util]unicode.c

branches/alto2/src/lib/util/unicode.c
r26222r26223
1// license:BSD-3-Clause
1// license:BSD-3-Clause
22// copyright-holders:Aaron Giles, Jürgen Buchmüller
33/*********************************************************************
44
r26222r26223
2121 */
2222int uchar_isvalid(unicode_char uchar)
2323{
24   return (uchar < 0x110000) && !((uchar >= 0xd800) && (uchar <= 0xdfff));
24    return (uchar < 0x110000) && !((uchar >= 0xd800) && (uchar <= 0xdfff));
2525}
2626
2727
r26222r26223
11061106
11071107static const unicode_range_t unicode_ranges[] =
11081108{
1109   {0x0000, 0x007f, "Basic Latin"},
1110   {0x0080, 0x00ff, "Latin-1 Supplement"},
1111   {0x0100, 0x017f, "Latin Extended-A"},
1112   {0x0180, 0x024f, "Latin Extended-B"},
1113   {0x0250, 0x02af, "IPA Extensions"},
1114   {0x02b0, 0x02ff, "Spacing Modifier Letters"},
1115   {0x0300, 0x036f, "Combining Diacritical Marks"},
1116   {0x0370, 0x03ff, "Greek"},
1117   {0x0400, 0x04ff, "Cyrillic"},
1118   {0x0530, 0x058f, "Armenian"},
1119   {0x0590, 0x05ff, "Hebrew"},
1120   {0x0600, 0x06ff, "Arabic"},
1121   {0x0700, 0x074f, "Syriac"},
1122   {0x0780, 0x07bf, "Thaana"},
1123   {0x0900, 0x097f, "Devanagari"},
1124   {0x0980, 0x09ff, "Bengali"},
1125   {0x0a00, 0x0a7f, "Gurmukhi"},
1126   {0x0a80, 0x0aff, "Gujarati"},
1127   {0x0b00, 0x0b7f, "Oriya"},
1128   {0x0b80, 0x0bff, "Tamil"},
1129   {0x0c00, 0x0c7f, "Telugu"},
1130   {0x0c80, 0x0cff, "Kannada"},
1131   {0x0d00, 0x0d7f, "Malayalam"},
1132   {0x0d80, 0x0dff, "Sinhala"},
1133   {0x0e00, 0x0e7f, "Thai"},
1134   {0x0e80, 0x0eff, "Lao"},
1135   {0x0f00, 0x0fff, "Tibetan"},
1136   {0x1000, 0x109f, "Myanmar"},
1137   {0x10a0, 0x10ff, "Georgian"},
1138   {0x1100, 0x11ff, "Hangul Jamo"},
1139   {0x1200, 0x137f, "Ethiopic"},
1140   {0x13a0, 0x13ff, "Cherokee"},
1141   {0x1400, 0x167f, "Unified Canadian Aboriginal Syllabic"},
1142   {0x1680, 0x169f, "Ogham"},
1143   {0x16a0, 0x16ff, "Runic"},
1144   {0x1780, 0x17ff, "Khmer"},
1145   {0x1800, 0x18af, "Mongolian"},
1146   {0x1e00, 0x1eff, "Latin Extended Additional"},
1147   {0x1f00, 0x1fff, "Greek Extended"},
1148   {0x2000, 0x206f, "General Punctuation"},
1149   {0x2070, 0x208f, "Superscripts and Subscripts"},
1150   {0x20a0, 0x20cf, "Currency Symbols"},
1151   {0x20d0, 0x20ff, "Combining Marks for Symbols"},
1152   {0x2100, 0x214f, "Letterlike Symbols"},
1153   {0x2150, 0x218f, "Number Forms"},
1154   {0x2190, 0x21ff, "Arrows"},
1155   {0x2200, 0x22ff, "Mathematical Operators"},
1156   {0x2300, 0x23ff, "Miscellaneous Technical"},
1157   {0x2400, 0x243f, "Control Pictures"},
1158   {0x2440, 0x245f, "Optical Character Recognition"},
1159   {0x2460, 0x24ff, "Enclosed Alphanumerics"},
1160   {0x2500, 0x257f, "Box Drawing"},
1161   {0x2580, 0x259f, "Block Elements"},
1162   {0x25a0, 0x25ff, "Geometric Shapes"},
1163   {0x2600, 0x26ff, "Miscellaneous Symbols"},
1164   {0x2700, 0x27bf, "Dingbats"},
1165   {0x2800, 0x28ff, "Braille Patterns"},
1166   {0x2e80, 0x2eff, "CJK Radicals Supplement"},
1167   {0x2f00, 0x2fdf, "Kangxi Radicals"},
1168   {0x2ff0, 0x2fff, "Ideographic Description Characters"},
1169   {0x3000, 0x303f, "CJK Symbols and Punctuation"},
1170   {0x3040, 0x309f, "Hiragana"},
1171   {0x30a0, 0x30ff, "Katakana"},
1172   {0x3100, 0x312f, "Bopomofo"},
1173   {0x3130, 0x318f, "Hangul Compatibility Jamo"},
1174   {0x3190, 0x319f, "Kanbun"},
1175   {0x31a0, 0x31bf, "Bopomofo Extended"},
1176   {0x3200, 0x32ff, "Enclosed CJK Letters and Months"},
1177   {0x3300, 0x33ff, "CJK Compatibility"},
1178   {0x3400, 0x4dbf, "CJK Unified Ideographs Extension A"},
1179   {0x4e00, 0x9faf, "CJK Unified Ideographs"},
1180   {0xa000, 0xa48f, "Yi Syllables"},
1181   {0xa490, 0xa4cf, "Yi Radicals"},
1182   {0xac00, 0xd7af, "Hangul Syllables"},
1183   {0xd800, 0xdb7f, "High Surrogates"},
1184   {0xdb80, 0xdbff, "High Private Use Surrogates"},
1185   {0xdc00, 0xdfff, "Low Surrogates"},
1186   {0xe000, 0xf8ff, "Private Use"},
1187   {0xf900, 0xfaff, "CJK Compatibility Ideographs"},
1188   {0xfb00, 0xfb4f, "Alphabetic Presentation Forms"},
1189   {0xfb50, 0xfdff, "Arabic Presentation Forms-A"},
1190   {0xfe20, 0xfe2f, "Combining Half Marks"},
1191   {0xfe30, 0xfe4f, "CJK Compatibility Forms"},
1192   {0xfe50, 0xfe6f, "Small Form Variants"},
1193   {0xfe70, 0xfeff, "Arabic Presentation Forms-B"},
1194   {0xff00, 0xffef, "Halfwidth and Fullwidth Forms"},
1195   {0xfff0, 0xffff, "Specials"}
1196   // FIXME: add ranges for the Unicode planes 1 to 16
1109   {  0x0000,  0x007F, "Basic Latin"},
1110   {  0x0080,  0x00FF, "Latin-1 Supplement"},
1111   {  0x0100,  0x017F, "Latin Extended-A"},
1112   {  0x0180,  0x024F, "Latin Extended-B"},
1113   {  0x0250,  0x02AF, "IPA Extensions"},
1114   {  0x02B0,  0x02FF, "Spacing Modifier Letters"},
1115   {  0x0300,  0x036F, "Combining Diacritical Marks"},
1116   {  0x0370,  0x03FF, "Greek and Coptic"},
1117   {  0x0400,  0x04FF, "Cyrillic"},
1118   {  0x0500,  0x052F, "Cyrillic Supplement"},
1119   {  0x0530,  0x058F, "Armenian"},
1120   {  0x0590,  0x05FF, "Hebrew"},
1121   {  0x0600,  0x06FF, "Arabic"},
1122   {  0x0700,  0x074F, "Syriac"},
1123   {  0x0750,  0x077F, "Arabic Supplement"},
1124   {  0x0780,  0x07BF, "Thaana"},
1125   {  0x07C0,  0x07FF, "NKo"},
1126   {  0x0800,  0x083F, "Samaritan"},
1127   {  0x0840,  0x085F, "Mandaic"},
1128   {  0x08A0,  0x08FF, "Arabic Extended-A"},
1129   {  0x0900,  0x097F, "Devanagari"},
1130   {  0x0980,  0x09FF, "Bengali"},
1131   {  0x0A00,  0x0A7F, "Gurmukhi"},
1132   {  0x0A80,  0x0AFF, "Gujarati"},
1133   {  0x0B00,  0x0B7F, "Oriya"},
1134   {  0x0B80,  0x0BFF, "Tamil"},
1135   {  0x0C00,  0x0C7F, "Telugu"},
1136   {  0x0C80,  0x0CFF, "Kannada"},
1137   {  0x0D00,  0x0D7F, "Malayalam"},
1138   {  0x0D80,  0x0DFF, "Sinhala"},
1139   {  0x0E00,  0x0E7F, "Thai"},
1140   {  0x0E80,  0x0EFF, "Lao"},
1141   {  0x0F00,  0x0FFF, "Tibetan"},
1142   {  0x1000,  0x109F, "Myanmar"},
1143   {  0x10A0,  0x10FF, "Georgian"},
1144   {  0x1100,  0x11FF, "Hangul Jamo"},
1145   {  0x1200,  0x137F, "Ethiopic"},
1146   {  0x1380,  0x139F, "Ethiopic Supplement"},
1147   {  0x13A0,  0x13FF, "Cherokee"},
1148   {  0x1400,  0x167F, "Unified Canadian Aboriginal Syllabics"},
1149   {  0x1680,  0x169F, "Ogham"},
1150   {  0x16A0,  0x16FF, "Runic"},
1151   {  0x1700,  0x171F, "Tagalog"},
1152   {  0x1720,  0x173F, "Hanunoo"},
1153   {  0x1740,  0x175F, "Buhid"},
1154   {  0x1760,  0x177F, "Tagbanwa"},
1155   {  0x1780,  0x17FF, "Khmer"},
1156   {  0x1800,  0x18AF, "Mongolian"},
1157   {  0x18B0,  0x18FF, "Unified Canadian Aboriginal Syllabics Extended"},
1158   {  0x1900,  0x194F, "Limbu"},
1159   {  0x1950,  0x197F, "Tai Le"},
1160   {  0x1980,  0x19DF, "New Tai Lue"},
1161   {  0x19E0,  0x19FF, "Khmer Symbols"},
1162   {  0x1A00,  0x1A1F, "Buginese"},
1163   {  0x1A20,  0x1AAF, "Tai Tham"},
1164   {  0x1B00,  0x1B7F, "Balinese"},
1165   {  0x1B80,  0x1BBF, "Sundanese"},
1166   {  0x1BC0,  0x1BFF, "Batak"},
1167   {  0x1C00,  0x1C4F, "Lepcha"},
1168   {  0x1C50,  0x1C7F, "Ol Chiki"},
1169   {  0x1CC0,  0x1CCF, "Sundanese Supplement"},
1170   {  0x1CD0,  0x1CFF, "Vedic Extensions"},
1171   {  0x1D00,  0x1D7F, "Phonetic Extensions"},
1172   {  0x1D80,  0x1DBF, "Phonetic Extensions Supplement"},
1173   {  0x1DC0,  0x1DFF, "Combining Diacritical Marks Supplement"},
1174   {  0x1E00,  0x1EFF, "Latin Extended Additional"},
1175   {  0x1F00,  0x1FFF, "Greek Extended"},
1176   {  0x2000,  0x206F, "General Punctuation"},
1177   {  0x2070,  0x209F, "Superscripts and Subscripts"},
1178   {  0x20A0,  0x20CF, "Currency Symbols"},
1179   {  0x20D0,  0x20FF, "Combining Diacritical Marks for Symbols"},
1180   {  0x2100,  0x214F, "Letterlike Symbols"},
1181   {  0x2150,  0x218F, "Number Forms"},
1182   {  0x2190,  0x21FF, "Arrows"},
1183   {  0x2200,  0x22FF, "Mathematical Operators"},
1184   {  0x2300,  0x23FF, "Miscellaneous Technical"},
1185   {  0x2400,  0x243F, "Control Pictures"},
1186   {  0x2440,  0x245F, "Optical Character Recognition"},
1187   {  0x2460,  0x24FF, "Enclosed Alphanumerics"},
1188   {  0x2500,  0x257F, "Box Drawing"},
1189   {  0x2580,  0x259F, "Block Elements"},
1190   {  0x25A0,  0x25FF, "Geometric Shapes"},
1191   {  0x2600,  0x26FF, "Miscellaneous Symbols"},
1192   {  0x2700,  0x27BF, "Dingbats"},
1193   {  0x27C0,  0x27EF, "Miscellaneous Mathematical Symbols-A"},
1194   {  0x27F0,  0x27FF, "Supplemental Arrows-A"},
1195   {  0x2800,  0x28FF, "Braille Patterns"},
1196   {  0x2900,  0x297F, "Supplemental Arrows-B"},
1197   {  0x2980,  0x29FF, "Miscellaneous Mathematical Symbols-B"},
1198   {  0x2A00,  0x2AFF, "Supplemental Mathematical Operators"},
1199   {  0x2B00,  0x2BFF, "Miscellaneous Symbols and Arrows"},
1200   {  0x2C00,  0x2C5F, "Glagolitic"},
1201   {  0x2C60,  0x2C7F, "Latin Extended-C"},
1202   {  0x2C80,  0x2CFF, "Coptic"},
1203   {  0x2D00,  0x2D2F, "Georgian Supplement"},
1204   {  0x2D30,  0x2D7F, "Tifinagh"},
1205   {  0x2D80,  0x2DDF, "Ethiopic Extended"},
1206   {  0x2DE0,  0x2DFF, "Cyrillic Extended-A"},
1207   {  0x2E00,  0x2E7F, "Supplemental Punctuation"},
1208   {  0x2E80,  0x2EFF, "CJK Radicals Supplement"},
1209   {  0x2F00,  0x2FDF, "Kangxi Radicals"},
1210   {  0x2FF0,  0x2FFF, "Ideographic Description Characters"},
1211   {  0x3000,  0x303F, "CJK Symbols and Punctuation"},
1212   {  0x3040,  0x309F, "Hiragana"},
1213   {  0x30A0,  0x30FF, "Katakana"},
1214   {  0x3100,  0x312F, "Bopomofo"},
1215   {  0x3130,  0x318F, "Hangul Compatibility Jamo"},
1216   {  0x3190,  0x319F, "Kanbun"},
1217   {  0x31A0,  0x31BF, "Bopomofo Extended"},
1218   {  0x31C0,  0x31EF, "CJK Strokes"},
1219   {  0x31F0,  0x31FF, "Katakana Phonetic Extensions"},
1220   {  0x3200,  0x32FF, "Enclosed CJK Letters and Months"},
1221   {  0x3300,  0x33FF, "CJK Compatibility"},
1222   {  0x3400,  0x4DBF, "CJK Unified Ideographs Extension A"},
1223   {  0x4DC0,  0x4DFF, "Yijing Hexagram Symbols"},
1224   {  0x4E00,  0x9FFF, "CJK Unified Ideographs"},
1225   {  0xA000,  0xA48F, "Yi Syllables"},
1226   {  0xA490,  0xA4CF, "Yi Radicals"},
1227   {  0xA4D0,  0xA4FF, "Lisu"},
1228   {  0xA500,  0xA63F, "Vai"},
1229   {  0xA640,  0xA69F, "Cyrillic Extended-B"},
1230   {  0xA6A0,  0xA6FF, "Bamum"},
1231   {  0xA700,  0xA71F, "Modifier Tone Letters"},
1232   {  0xA720,  0xA7FF, "Latin Extended-D"},
1233   {  0xA800,  0xA82F, "Syloti Nagri"},
1234   {  0xA830,  0xA83F, "Common Indic Number Forms"},
1235   {  0xA840,  0xA87F, "Phags-pa"},
1236   {  0xA880,  0xA8DF, "Saurashtra"},
1237   {  0xA8E0,  0xA8FF, "Devanagari Extended"},
1238   {  0xA900,  0xA92F, "Kayah Li"},
1239   {  0xA930,  0xA95F, "Rejang"},
1240   {  0xA960,  0xA97F, "Hangul Jamo Extended-A"},
1241   {  0xA980,  0xA9DF, "Javanese"},
1242   {  0xAA00,  0xAA5F, "Cham"},
1243   {  0xAA60,  0xAA7F, "Myanmar Extended-A"},
1244   {  0xAA80,  0xAADF, "Tai Viet"},
1245   {  0xAAE0,  0xAAFF, "Meetei Mayek Extensions"},
1246   {  0xAB00,  0xAB2F, "Ethiopic Extended-A"},
1247   {  0xABC0,  0xABFF, "Meetei Mayek"},
1248   {  0xAC00,  0xD7AF, "Hangul Syllables"},
1249   {  0xD7B0,  0xD7FF, "Hangul Jamo Extended-B"},
1250   {  0xD800,  0xDB7F, "High Surrogates"},
1251   {  0xDB80,  0xDBFF, "High Private Use Surrogates"},
1252   {  0xDC00,  0xDFFF, "Low Surrogates"},
1253   {  0xE000,  0xF8FF, "Private Use Area"},
1254   {  0xF900,  0xFAFF, "CJK Compatibility Ideographs"},
1255   {  0xFB00,  0xFB4F, "Alphabetic Presentation Forms"},
1256   {  0xFB50,  0xFDFF, "Arabic Presentation Forms-A"},
1257   {  0xFE00,  0xFE0F, "Variation Selectors"},
1258   {  0xFE10,  0xFE1F, "Vertical Forms"},
1259   {  0xFE20,  0xFE2F, "Combining Half Marks"},
1260   {  0xFE30,  0xFE4F, "CJK Compatibility Forms"},
1261   {  0xFE50,  0xFE6F, "Small Form Variants"},
1262   {  0xFE70,  0xFEFF, "Arabic Presentation Forms-B"},
1263   {  0xFF00,  0xFFEF, "Halfwidth and Fullwidth Forms"},
1264   {  0xFFF0,  0xFFFF, "Specials"},
1265   { 0x10000, 0x1007F, "Linear B Syllabary"},
1266   { 0x10080, 0x100FF, "Linear B Ideograms"},
1267   { 0x10100, 0x1013F, "Aegean Numbers"},
1268   { 0x10140, 0x1018F, "Ancient Greek Numbers"},
1269   { 0x10190, 0x101CF, "Ancient Symbols"},
1270   { 0x101D0, 0x101FF, "Phaistos Disc"},
1271   { 0x10280, 0x1029F, "Lycian"},
1272   { 0x102A0, 0x102DF, "Carian"},
1273   { 0x10300, 0x1032F, "Old Italic"},
1274   { 0x10330, 0x1034F, "Gothic"},
1275   { 0x10380, 0x1039F, "Ugaritic"},
1276   { 0x103A0, 0x103DF, "Old Persian"},
1277   { 0x10400, 0x1044F, "Deseret"},
1278   { 0x10450, 0x1047F, "Shavian"},
1279   { 0x10480, 0x104AF, "Osmanya"},
1280   { 0x10800, 0x1083F, "Cypriot Syllabary"},
1281   { 0x10840, 0x1085F, "Imperial Aramaic"},
1282   { 0x10900, 0x1091F, "Phoenician"},
1283   { 0x10920, 0x1093F, "Lydian"},
1284   { 0x10980, 0x1099F, "Meroitic Hieroglyphs"},
1285   { 0x109A0, 0x109FF, "Meroitic Cursive"},
1286   { 0x10A00, 0x10A5F, "Kharoshthi"},
1287   { 0x10A60, 0x10A7F, "Old South Arabian"},
1288   { 0x10B00, 0x10B3F, "Avestan"},
1289   { 0x10B40, 0x10B5F, "Inscriptional Parthian"},
1290   { 0x10B60, 0x10B7F, "Inscriptional Pahlavi"},
1291   { 0x10C00, 0x10C4F, "Old Turkic"},
1292   { 0x10E60, 0x10E7F, "Rumi Numeral Symbols"},
1293   { 0x11000, 0x1107F, "Brahmi"},
1294   { 0x11080, 0x110CF, "Kaithi"},
1295   { 0x110D0, 0x110FF, "Sora Sompeng"},
1296   { 0x11100, 0x1114F, "Chakma"},
1297   { 0x11180, 0x111DF, "Sharada"},
1298   { 0x11680, 0x116CF, "Takri"},
1299   { 0x12000, 0x123FF, "Cuneiform"},
1300   { 0x12400, 0x1247F, "Cuneiform Numbers and Punctuation"},
1301   { 0x13000, 0x1342F, "Egyptian Hieroglyphs"},
1302   { 0x16800, 0x16A3F, "Bamum Supplement"},
1303   { 0x16F00, 0x16F9F, "Miao"},
1304   { 0x1B000, 0x1B0FF, "Kana Supplement"},
1305   { 0x1D000, 0x1D0FF, "Byzantine Musical Symbols"},
1306   { 0x1D100, 0x1D1FF, "Musical Symbols"},
1307   { 0x1D200, 0x1D24F, "Ancient Greek Musical Notation"},
1308   { 0x1D300, 0x1D35F, "Tai Xuan Jing Symbols"},
1309   { 0x1D360, 0x1D37F, "Counting Rod Numerals"},
1310   { 0x1D400, 0x1D7FF, "Mathematical Alphanumeric Symbols"},
1311   { 0x1EE00, 0x1EEFF, "Arabic Mathematical Alphabetic Symbols"},
1312   { 0x1F000, 0x1F02F, "Mahjong Tiles"},
1313   { 0x1F030, 0x1F09F, "Domino Tiles"},
1314   { 0x1F0A0, 0x1F0FF, "Playing Cards"},
1315   { 0x1F100, 0x1F1FF, "Enclosed Alphanumeric Supplement"},
1316   { 0x1F200, 0x1F2FF, "Enclosed Ideographic Supplement"},
1317   { 0x1F300, 0x1F5FF, "Miscellaneous Symbols And Pictographs"},
1318   { 0x1F600, 0x1F64F, "Emoticons"},
1319   { 0x1F680, 0x1F6FF, "Transport And Map Symbols"},
1320   { 0x1F700, 0x1F77F, "Alchemical Symbols"},
1321   { 0x20000, 0x2A6DF, "CJK Unified Ideographs Extension B"},
1322   { 0x2A700, 0x2B73F, "CJK Unified Ideographs Extension C"},
1323   { 0x2B740, 0x2B81F, "CJK Unified Ideographs Extension D"},
1324   { 0x2F800, 0x2FA1F, "CJK Compatibility Ideographs Supplement"},
1325   { 0xE0000, 0xE007F, "Tags"},
1326   { 0xE0100, 0xE01EF, "Variation Selectors Supplement"},
1327   { 0xF0000, 0xFFFFF, "Supplementary Private Use Area-A"},
1328   {0x100000,0x10FFFF, "Supplementary Private Use Area-B"}
11971329};
11981330
1331/**
1332 * @brief return the name of the Unicode range which %uchar lies in
1333 * @param uchar Unicode value to search the range for
1334 * @return descriptive text for the range, NULL if not found
1335 */
11991336const char * unicode_range_name(unicode_char uchar)
12001337{
12011338   int _min = 0;
r26222r26223
12161353   return NULL;
12171354}
12181355
1356/**
1357 * @brief return the first value of the Unicode range which %uchar lies in
1358 * @param uchar Unicode value to search the range for
1359 * @return range first code
1360 */
12191361unicode_char unicode_range_first(unicode_char uchar)
12201362{
12211363   int _min = 0;
r26222r26223
12361378   return uchar;
12371379}
12381380
1381/**
1382 * @brief return the last value of the Unicode range which %uchar lies in
1383 * @param uchar Unicode value to search the range for
1384 * @return range last code
1385 */
12391386unicode_char unicode_range_last(unicode_char uchar)
12401387{
12411388   int _min = 0;

Previous 199869 Revisions Next


© 1997-2024 The MAME Team