I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,802 @@
# -*- coding: utf-8 -*-
#
# NOTE: This file was auto-generated with MetaTools/buildUCD.py.
# Source: https://unicode.org/Public/UNIDATA/Blocks.txt
# License: http://unicode.org/copyright.html#License
#
# Blocks-16.0.0.txt
# Date: 2024-02-02
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see https://www.unicode.org/reports/tr44/
#
# Format:
# Start Code..End Code; Block Name
RANGES = [
0x0000, # .. 0x007F ; Basic Latin
0x0080, # .. 0x00FF ; Latin-1 Supplement
0x0100, # .. 0x017F ; Latin Extended-A
0x0180, # .. 0x024F ; Latin Extended-B
0x0250, # .. 0x02AF ; IPA Extensions
0x02B0, # .. 0x02FF ; Spacing Modifier Letters
0x0300, # .. 0x036F ; Combining Diacritical Marks
0x0370, # .. 0x03FF ; Greek and Coptic
0x0400, # .. 0x04FF ; Cyrillic
0x0500, # .. 0x052F ; Cyrillic Supplement
0x0530, # .. 0x058F ; Armenian
0x0590, # .. 0x05FF ; Hebrew
0x0600, # .. 0x06FF ; Arabic
0x0700, # .. 0x074F ; Syriac
0x0750, # .. 0x077F ; Arabic Supplement
0x0780, # .. 0x07BF ; Thaana
0x07C0, # .. 0x07FF ; NKo
0x0800, # .. 0x083F ; Samaritan
0x0840, # .. 0x085F ; Mandaic
0x0860, # .. 0x086F ; Syriac Supplement
0x0870, # .. 0x089F ; Arabic Extended-B
0x08A0, # .. 0x08FF ; Arabic Extended-A
0x0900, # .. 0x097F ; Devanagari
0x0980, # .. 0x09FF ; Bengali
0x0A00, # .. 0x0A7F ; Gurmukhi
0x0A80, # .. 0x0AFF ; Gujarati
0x0B00, # .. 0x0B7F ; Oriya
0x0B80, # .. 0x0BFF ; Tamil
0x0C00, # .. 0x0C7F ; Telugu
0x0C80, # .. 0x0CFF ; Kannada
0x0D00, # .. 0x0D7F ; Malayalam
0x0D80, # .. 0x0DFF ; Sinhala
0x0E00, # .. 0x0E7F ; Thai
0x0E80, # .. 0x0EFF ; Lao
0x0F00, # .. 0x0FFF ; Tibetan
0x1000, # .. 0x109F ; Myanmar
0x10A0, # .. 0x10FF ; Georgian
0x1100, # .. 0x11FF ; Hangul Jamo
0x1200, # .. 0x137F ; Ethiopic
0x1380, # .. 0x139F ; Ethiopic Supplement
0x13A0, # .. 0x13FF ; Cherokee
0x1400, # .. 0x167F ; Unified Canadian Aboriginal Syllabics
0x1680, # .. 0x169F ; Ogham
0x16A0, # .. 0x16FF ; Runic
0x1700, # .. 0x171F ; Tagalog
0x1720, # .. 0x173F ; Hanunoo
0x1740, # .. 0x175F ; Buhid
0x1760, # .. 0x177F ; Tagbanwa
0x1780, # .. 0x17FF ; Khmer
0x1800, # .. 0x18AF ; Mongolian
0x18B0, # .. 0x18FF ; Unified Canadian Aboriginal Syllabics Extended
0x1900, # .. 0x194F ; Limbu
0x1950, # .. 0x197F ; Tai Le
0x1980, # .. 0x19DF ; New Tai Lue
0x19E0, # .. 0x19FF ; Khmer Symbols
0x1A00, # .. 0x1A1F ; Buginese
0x1A20, # .. 0x1AAF ; Tai Tham
0x1AB0, # .. 0x1AFF ; Combining Diacritical Marks Extended
0x1B00, # .. 0x1B7F ; Balinese
0x1B80, # .. 0x1BBF ; Sundanese
0x1BC0, # .. 0x1BFF ; Batak
0x1C00, # .. 0x1C4F ; Lepcha
0x1C50, # .. 0x1C7F ; Ol Chiki
0x1C80, # .. 0x1C8F ; Cyrillic Extended-C
0x1C90, # .. 0x1CBF ; Georgian Extended
0x1CC0, # .. 0x1CCF ; Sundanese Supplement
0x1CD0, # .. 0x1CFF ; Vedic Extensions
0x1D00, # .. 0x1D7F ; Phonetic Extensions
0x1D80, # .. 0x1DBF ; Phonetic Extensions Supplement
0x1DC0, # .. 0x1DFF ; Combining Diacritical Marks Supplement
0x1E00, # .. 0x1EFF ; Latin Extended Additional
0x1F00, # .. 0x1FFF ; Greek Extended
0x2000, # .. 0x206F ; General Punctuation
0x2070, # .. 0x209F ; Superscripts and Subscripts
0x20A0, # .. 0x20CF ; Currency Symbols
0x20D0, # .. 0x20FF ; Combining Diacritical Marks for Symbols
0x2100, # .. 0x214F ; Letterlike Symbols
0x2150, # .. 0x218F ; Number Forms
0x2190, # .. 0x21FF ; Arrows
0x2200, # .. 0x22FF ; Mathematical Operators
0x2300, # .. 0x23FF ; Miscellaneous Technical
0x2400, # .. 0x243F ; Control Pictures
0x2440, # .. 0x245F ; Optical Character Recognition
0x2460, # .. 0x24FF ; Enclosed Alphanumerics
0x2500, # .. 0x257F ; Box Drawing
0x2580, # .. 0x259F ; Block Elements
0x25A0, # .. 0x25FF ; Geometric Shapes
0x2600, # .. 0x26FF ; Miscellaneous Symbols
0x2700, # .. 0x27BF ; Dingbats
0x27C0, # .. 0x27EF ; Miscellaneous Mathematical Symbols-A
0x27F0, # .. 0x27FF ; Supplemental Arrows-A
0x2800, # .. 0x28FF ; Braille Patterns
0x2900, # .. 0x297F ; Supplemental Arrows-B
0x2980, # .. 0x29FF ; Miscellaneous Mathematical Symbols-B
0x2A00, # .. 0x2AFF ; Supplemental Mathematical Operators
0x2B00, # .. 0x2BFF ; Miscellaneous Symbols and Arrows
0x2C00, # .. 0x2C5F ; Glagolitic
0x2C60, # .. 0x2C7F ; Latin Extended-C
0x2C80, # .. 0x2CFF ; Coptic
0x2D00, # .. 0x2D2F ; Georgian Supplement
0x2D30, # .. 0x2D7F ; Tifinagh
0x2D80, # .. 0x2DDF ; Ethiopic Extended
0x2DE0, # .. 0x2DFF ; Cyrillic Extended-A
0x2E00, # .. 0x2E7F ; Supplemental Punctuation
0x2E80, # .. 0x2EFF ; CJK Radicals Supplement
0x2F00, # .. 0x2FDF ; Kangxi Radicals
0x2FE0, # .. 0x2FEF ; No_Block
0x2FF0, # .. 0x2FFF ; Ideographic Description Characters
0x3000, # .. 0x303F ; CJK Symbols and Punctuation
0x3040, # .. 0x309F ; Hiragana
0x30A0, # .. 0x30FF ; Katakana
0x3100, # .. 0x312F ; Bopomofo
0x3130, # .. 0x318F ; Hangul Compatibility Jamo
0x3190, # .. 0x319F ; Kanbun
0x31A0, # .. 0x31BF ; Bopomofo Extended
0x31C0, # .. 0x31EF ; CJK Strokes
0x31F0, # .. 0x31FF ; Katakana Phonetic Extensions
0x3200, # .. 0x32FF ; Enclosed CJK Letters and Months
0x3300, # .. 0x33FF ; CJK Compatibility
0x3400, # .. 0x4DBF ; CJK Unified Ideographs Extension A
0x4DC0, # .. 0x4DFF ; Yijing Hexagram Symbols
0x4E00, # .. 0x9FFF ; CJK Unified Ideographs
0xA000, # .. 0xA48F ; Yi Syllables
0xA490, # .. 0xA4CF ; Yi Radicals
0xA4D0, # .. 0xA4FF ; Lisu
0xA500, # .. 0xA63F ; Vai
0xA640, # .. 0xA69F ; Cyrillic Extended-B
0xA6A0, # .. 0xA6FF ; Bamum
0xA700, # .. 0xA71F ; Modifier Tone Letters
0xA720, # .. 0xA7FF ; Latin Extended-D
0xA800, # .. 0xA82F ; Syloti Nagri
0xA830, # .. 0xA83F ; Common Indic Number Forms
0xA840, # .. 0xA87F ; Phags-pa
0xA880, # .. 0xA8DF ; Saurashtra
0xA8E0, # .. 0xA8FF ; Devanagari Extended
0xA900, # .. 0xA92F ; Kayah Li
0xA930, # .. 0xA95F ; Rejang
0xA960, # .. 0xA97F ; Hangul Jamo Extended-A
0xA980, # .. 0xA9DF ; Javanese
0xA9E0, # .. 0xA9FF ; Myanmar Extended-B
0xAA00, # .. 0xAA5F ; Cham
0xAA60, # .. 0xAA7F ; Myanmar Extended-A
0xAA80, # .. 0xAADF ; Tai Viet
0xAAE0, # .. 0xAAFF ; Meetei Mayek Extensions
0xAB00, # .. 0xAB2F ; Ethiopic Extended-A
0xAB30, # .. 0xAB6F ; Latin Extended-E
0xAB70, # .. 0xABBF ; Cherokee Supplement
0xABC0, # .. 0xABFF ; Meetei Mayek
0xAC00, # .. 0xD7AF ; Hangul Syllables
0xD7B0, # .. 0xD7FF ; Hangul Jamo Extended-B
0xD800, # .. 0xDB7F ; High Surrogates
0xDB80, # .. 0xDBFF ; High Private Use Surrogates
0xDC00, # .. 0xDFFF ; Low Surrogates
0xE000, # .. 0xF8FF ; Private Use Area
0xF900, # .. 0xFAFF ; CJK Compatibility Ideographs
0xFB00, # .. 0xFB4F ; Alphabetic Presentation Forms
0xFB50, # .. 0xFDFF ; Arabic Presentation Forms-A
0xFE00, # .. 0xFE0F ; Variation Selectors
0xFE10, # .. 0xFE1F ; Vertical Forms
0xFE20, # .. 0xFE2F ; Combining Half Marks
0xFE30, # .. 0xFE4F ; CJK Compatibility Forms
0xFE50, # .. 0xFE6F ; Small Form Variants
0xFE70, # .. 0xFEFF ; Arabic Presentation Forms-B
0xFF00, # .. 0xFFEF ; Halfwidth and Fullwidth Forms
0xFFF0, # .. 0xFFFF ; Specials
0x10000, # .. 0x1007F ; Linear B Syllabary
0x10080, # .. 0x100FF ; Linear B Ideograms
0x10100, # .. 0x1013F ; Aegean Numbers
0x10140, # .. 0x1018F ; Ancient Greek Numbers
0x10190, # .. 0x101CF ; Ancient Symbols
0x101D0, # .. 0x101FF ; Phaistos Disc
0x10200, # .. 0x1027F ; No_Block
0x10280, # .. 0x1029F ; Lycian
0x102A0, # .. 0x102DF ; Carian
0x102E0, # .. 0x102FF ; Coptic Epact Numbers
0x10300, # .. 0x1032F ; Old Italic
0x10330, # .. 0x1034F ; Gothic
0x10350, # .. 0x1037F ; Old Permic
0x10380, # .. 0x1039F ; Ugaritic
0x103A0, # .. 0x103DF ; Old Persian
0x103E0, # .. 0x103FF ; No_Block
0x10400, # .. 0x1044F ; Deseret
0x10450, # .. 0x1047F ; Shavian
0x10480, # .. 0x104AF ; Osmanya
0x104B0, # .. 0x104FF ; Osage
0x10500, # .. 0x1052F ; Elbasan
0x10530, # .. 0x1056F ; Caucasian Albanian
0x10570, # .. 0x105BF ; Vithkuqi
0x105C0, # .. 0x105FF ; Todhri
0x10600, # .. 0x1077F ; Linear A
0x10780, # .. 0x107BF ; Latin Extended-F
0x107C0, # .. 0x107FF ; No_Block
0x10800, # .. 0x1083F ; Cypriot Syllabary
0x10840, # .. 0x1085F ; Imperial Aramaic
0x10860, # .. 0x1087F ; Palmyrene
0x10880, # .. 0x108AF ; Nabataean
0x108B0, # .. 0x108DF ; No_Block
0x108E0, # .. 0x108FF ; Hatran
0x10900, # .. 0x1091F ; Phoenician
0x10920, # .. 0x1093F ; Lydian
0x10940, # .. 0x1097F ; No_Block
0x10980, # .. 0x1099F ; Meroitic Hieroglyphs
0x109A0, # .. 0x109FF ; Meroitic Cursive
0x10A00, # .. 0x10A5F ; Kharoshthi
0x10A60, # .. 0x10A7F ; Old South Arabian
0x10A80, # .. 0x10A9F ; Old North Arabian
0x10AA0, # .. 0x10ABF ; No_Block
0x10AC0, # .. 0x10AFF ; Manichaean
0x10B00, # .. 0x10B3F ; Avestan
0x10B40, # .. 0x10B5F ; Inscriptional Parthian
0x10B60, # .. 0x10B7F ; Inscriptional Pahlavi
0x10B80, # .. 0x10BAF ; Psalter Pahlavi
0x10BB0, # .. 0x10BFF ; No_Block
0x10C00, # .. 0x10C4F ; Old Turkic
0x10C50, # .. 0x10C7F ; No_Block
0x10C80, # .. 0x10CFF ; Old Hungarian
0x10D00, # .. 0x10D3F ; Hanifi Rohingya
0x10D40, # .. 0x10D8F ; Garay
0x10D90, # .. 0x10E5F ; No_Block
0x10E60, # .. 0x10E7F ; Rumi Numeral Symbols
0x10E80, # .. 0x10EBF ; Yezidi
0x10EC0, # .. 0x10EFF ; Arabic Extended-C
0x10F00, # .. 0x10F2F ; Old Sogdian
0x10F30, # .. 0x10F6F ; Sogdian
0x10F70, # .. 0x10FAF ; Old Uyghur
0x10FB0, # .. 0x10FDF ; Chorasmian
0x10FE0, # .. 0x10FFF ; Elymaic
0x11000, # .. 0x1107F ; Brahmi
0x11080, # .. 0x110CF ; Kaithi
0x110D0, # .. 0x110FF ; Sora Sompeng
0x11100, # .. 0x1114F ; Chakma
0x11150, # .. 0x1117F ; Mahajani
0x11180, # .. 0x111DF ; Sharada
0x111E0, # .. 0x111FF ; Sinhala Archaic Numbers
0x11200, # .. 0x1124F ; Khojki
0x11250, # .. 0x1127F ; No_Block
0x11280, # .. 0x112AF ; Multani
0x112B0, # .. 0x112FF ; Khudawadi
0x11300, # .. 0x1137F ; Grantha
0x11380, # .. 0x113FF ; Tulu-Tigalari
0x11400, # .. 0x1147F ; Newa
0x11480, # .. 0x114DF ; Tirhuta
0x114E0, # .. 0x1157F ; No_Block
0x11580, # .. 0x115FF ; Siddham
0x11600, # .. 0x1165F ; Modi
0x11660, # .. 0x1167F ; Mongolian Supplement
0x11680, # .. 0x116CF ; Takri
0x116D0, # .. 0x116FF ; Myanmar Extended-C
0x11700, # .. 0x1174F ; Ahom
0x11750, # .. 0x117FF ; No_Block
0x11800, # .. 0x1184F ; Dogra
0x11850, # .. 0x1189F ; No_Block
0x118A0, # .. 0x118FF ; Warang Citi
0x11900, # .. 0x1195F ; Dives Akuru
0x11960, # .. 0x1199F ; No_Block
0x119A0, # .. 0x119FF ; Nandinagari
0x11A00, # .. 0x11A4F ; Zanabazar Square
0x11A50, # .. 0x11AAF ; Soyombo
0x11AB0, # .. 0x11ABF ; Unified Canadian Aboriginal Syllabics Extended-A
0x11AC0, # .. 0x11AFF ; Pau Cin Hau
0x11B00, # .. 0x11B5F ; Devanagari Extended-A
0x11B60, # .. 0x11BBF ; No_Block
0x11BC0, # .. 0x11BFF ; Sunuwar
0x11C00, # .. 0x11C6F ; Bhaiksuki
0x11C70, # .. 0x11CBF ; Marchen
0x11CC0, # .. 0x11CFF ; No_Block
0x11D00, # .. 0x11D5F ; Masaram Gondi
0x11D60, # .. 0x11DAF ; Gunjala Gondi
0x11DB0, # .. 0x11EDF ; No_Block
0x11EE0, # .. 0x11EFF ; Makasar
0x11F00, # .. 0x11F5F ; Kawi
0x11F60, # .. 0x11FAF ; No_Block
0x11FB0, # .. 0x11FBF ; Lisu Supplement
0x11FC0, # .. 0x11FFF ; Tamil Supplement
0x12000, # .. 0x123FF ; Cuneiform
0x12400, # .. 0x1247F ; Cuneiform Numbers and Punctuation
0x12480, # .. 0x1254F ; Early Dynastic Cuneiform
0x12550, # .. 0x12F8F ; No_Block
0x12F90, # .. 0x12FFF ; Cypro-Minoan
0x13000, # .. 0x1342F ; Egyptian Hieroglyphs
0x13430, # .. 0x1345F ; Egyptian Hieroglyph Format Controls
0x13460, # .. 0x143FF ; Egyptian Hieroglyphs Extended-A
0x14400, # .. 0x1467F ; Anatolian Hieroglyphs
0x14680, # .. 0x160FF ; No_Block
0x16100, # .. 0x1613F ; Gurung Khema
0x16140, # .. 0x167FF ; No_Block
0x16800, # .. 0x16A3F ; Bamum Supplement
0x16A40, # .. 0x16A6F ; Mro
0x16A70, # .. 0x16ACF ; Tangsa
0x16AD0, # .. 0x16AFF ; Bassa Vah
0x16B00, # .. 0x16B8F ; Pahawh Hmong
0x16B90, # .. 0x16D3F ; No_Block
0x16D40, # .. 0x16D7F ; Kirat Rai
0x16D80, # .. 0x16E3F ; No_Block
0x16E40, # .. 0x16E9F ; Medefaidrin
0x16EA0, # .. 0x16EFF ; No_Block
0x16F00, # .. 0x16F9F ; Miao
0x16FA0, # .. 0x16FDF ; No_Block
0x16FE0, # .. 0x16FFF ; Ideographic Symbols and Punctuation
0x17000, # .. 0x187FF ; Tangut
0x18800, # .. 0x18AFF ; Tangut Components
0x18B00, # .. 0x18CFF ; Khitan Small Script
0x18D00, # .. 0x18D7F ; Tangut Supplement
0x18D80, # .. 0x1AFEF ; No_Block
0x1AFF0, # .. 0x1AFFF ; Kana Extended-B
0x1B000, # .. 0x1B0FF ; Kana Supplement
0x1B100, # .. 0x1B12F ; Kana Extended-A
0x1B130, # .. 0x1B16F ; Small Kana Extension
0x1B170, # .. 0x1B2FF ; Nushu
0x1B300, # .. 0x1BBFF ; No_Block
0x1BC00, # .. 0x1BC9F ; Duployan
0x1BCA0, # .. 0x1BCAF ; Shorthand Format Controls
0x1BCB0, # .. 0x1CBFF ; No_Block
0x1CC00, # .. 0x1CEBF ; Symbols for Legacy Computing Supplement
0x1CEC0, # .. 0x1CEFF ; No_Block
0x1CF00, # .. 0x1CFCF ; Znamenny Musical Notation
0x1CFD0, # .. 0x1CFFF ; No_Block
0x1D000, # .. 0x1D0FF ; Byzantine Musical Symbols
0x1D100, # .. 0x1D1FF ; Musical Symbols
0x1D200, # .. 0x1D24F ; Ancient Greek Musical Notation
0x1D250, # .. 0x1D2BF ; No_Block
0x1D2C0, # .. 0x1D2DF ; Kaktovik Numerals
0x1D2E0, # .. 0x1D2FF ; Mayan Numerals
0x1D300, # .. 0x1D35F ; Tai Xuan Jing Symbols
0x1D360, # .. 0x1D37F ; Counting Rod Numerals
0x1D380, # .. 0x1D3FF ; No_Block
0x1D400, # .. 0x1D7FF ; Mathematical Alphanumeric Symbols
0x1D800, # .. 0x1DAAF ; Sutton SignWriting
0x1DAB0, # .. 0x1DEFF ; No_Block
0x1DF00, # .. 0x1DFFF ; Latin Extended-G
0x1E000, # .. 0x1E02F ; Glagolitic Supplement
0x1E030, # .. 0x1E08F ; Cyrillic Extended-D
0x1E090, # .. 0x1E0FF ; No_Block
0x1E100, # .. 0x1E14F ; Nyiakeng Puachue Hmong
0x1E150, # .. 0x1E28F ; No_Block
0x1E290, # .. 0x1E2BF ; Toto
0x1E2C0, # .. 0x1E2FF ; Wancho
0x1E300, # .. 0x1E4CF ; No_Block
0x1E4D0, # .. 0x1E4FF ; Nag Mundari
0x1E500, # .. 0x1E5CF ; No_Block
0x1E5D0, # .. 0x1E5FF ; Ol Onal
0x1E600, # .. 0x1E7DF ; No_Block
0x1E7E0, # .. 0x1E7FF ; Ethiopic Extended-B
0x1E800, # .. 0x1E8DF ; Mende Kikakui
0x1E8E0, # .. 0x1E8FF ; No_Block
0x1E900, # .. 0x1E95F ; Adlam
0x1E960, # .. 0x1EC6F ; No_Block
0x1EC70, # .. 0x1ECBF ; Indic Siyaq Numbers
0x1ECC0, # .. 0x1ECFF ; No_Block
0x1ED00, # .. 0x1ED4F ; Ottoman Siyaq Numbers
0x1ED50, # .. 0x1EDFF ; No_Block
0x1EE00, # .. 0x1EEFF ; Arabic Mathematical Alphabetic Symbols
0x1EF00, # .. 0x1EFFF ; No_Block
0x1F000, # .. 0x1F02F ; Mahjong Tiles
0x1F030, # .. 0x1F09F ; Domino Tiles
0x1F0A0, # .. 0x1F0FF ; Playing Cards
0x1F100, # .. 0x1F1FF ; Enclosed Alphanumeric Supplement
0x1F200, # .. 0x1F2FF ; Enclosed Ideographic Supplement
0x1F300, # .. 0x1F5FF ; Miscellaneous Symbols and Pictographs
0x1F600, # .. 0x1F64F ; Emoticons
0x1F650, # .. 0x1F67F ; Ornamental Dingbats
0x1F680, # .. 0x1F6FF ; Transport and Map Symbols
0x1F700, # .. 0x1F77F ; Alchemical Symbols
0x1F780, # .. 0x1F7FF ; Geometric Shapes Extended
0x1F800, # .. 0x1F8FF ; Supplemental Arrows-C
0x1F900, # .. 0x1F9FF ; Supplemental Symbols and Pictographs
0x1FA00, # .. 0x1FA6F ; Chess Symbols
0x1FA70, # .. 0x1FAFF ; Symbols and Pictographs Extended-A
0x1FB00, # .. 0x1FBFF ; Symbols for Legacy Computing
0x1FC00, # .. 0x1FFFF ; No_Block
0x20000, # .. 0x2A6DF ; CJK Unified Ideographs Extension B
0x2A6E0, # .. 0x2A6FF ; No_Block
0x2A700, # .. 0x2B73F ; CJK Unified Ideographs Extension C
0x2B740, # .. 0x2B81F ; CJK Unified Ideographs Extension D
0x2B820, # .. 0x2CEAF ; CJK Unified Ideographs Extension E
0x2CEB0, # .. 0x2EBEF ; CJK Unified Ideographs Extension F
0x2EBF0, # .. 0x2EE5F ; CJK Unified Ideographs Extension I
0x2EE60, # .. 0x2F7FF ; No_Block
0x2F800, # .. 0x2FA1F ; CJK Compatibility Ideographs Supplement
0x2FA20, # .. 0x2FFFF ; No_Block
0x30000, # .. 0x3134F ; CJK Unified Ideographs Extension G
0x31350, # .. 0x323AF ; CJK Unified Ideographs Extension H
0x323B0, # .. 0xDFFFF ; No_Block
0xE0000, # .. 0xE007F ; Tags
0xE0080, # .. 0xE00FF ; No_Block
0xE0100, # .. 0xE01EF ; Variation Selectors Supplement
0xE01F0, # .. 0xEFFFF ; No_Block
0xF0000, # .. 0xFFFFF ; Supplementary Private Use Area-A
0x100000, # .. 0x10FFFF ; Supplementary Private Use Area-B
]
VALUES = [
"Basic Latin", # 0000..007F
"Latin-1 Supplement", # 0080..00FF
"Latin Extended-A", # 0100..017F
"Latin Extended-B", # 0180..024F
"IPA Extensions", # 0250..02AF
"Spacing Modifier Letters", # 02B0..02FF
"Combining Diacritical Marks", # 0300..036F
"Greek and Coptic", # 0370..03FF
"Cyrillic", # 0400..04FF
"Cyrillic Supplement", # 0500..052F
"Armenian", # 0530..058F
"Hebrew", # 0590..05FF
"Arabic", # 0600..06FF
"Syriac", # 0700..074F
"Arabic Supplement", # 0750..077F
"Thaana", # 0780..07BF
"NKo", # 07C0..07FF
"Samaritan", # 0800..083F
"Mandaic", # 0840..085F
"Syriac Supplement", # 0860..086F
"Arabic Extended-B", # 0870..089F
"Arabic Extended-A", # 08A0..08FF
"Devanagari", # 0900..097F
"Bengali", # 0980..09FF
"Gurmukhi", # 0A00..0A7F
"Gujarati", # 0A80..0AFF
"Oriya", # 0B00..0B7F
"Tamil", # 0B80..0BFF
"Telugu", # 0C00..0C7F
"Kannada", # 0C80..0CFF
"Malayalam", # 0D00..0D7F
"Sinhala", # 0D80..0DFF
"Thai", # 0E00..0E7F
"Lao", # 0E80..0EFF
"Tibetan", # 0F00..0FFF
"Myanmar", # 1000..109F
"Georgian", # 10A0..10FF
"Hangul Jamo", # 1100..11FF
"Ethiopic", # 1200..137F
"Ethiopic Supplement", # 1380..139F
"Cherokee", # 13A0..13FF
"Unified Canadian Aboriginal Syllabics", # 1400..167F
"Ogham", # 1680..169F
"Runic", # 16A0..16FF
"Tagalog", # 1700..171F
"Hanunoo", # 1720..173F
"Buhid", # 1740..175F
"Tagbanwa", # 1760..177F
"Khmer", # 1780..17FF
"Mongolian", # 1800..18AF
"Unified Canadian Aboriginal Syllabics Extended", # 18B0..18FF
"Limbu", # 1900..194F
"Tai Le", # 1950..197F
"New Tai Lue", # 1980..19DF
"Khmer Symbols", # 19E0..19FF
"Buginese", # 1A00..1A1F
"Tai Tham", # 1A20..1AAF
"Combining Diacritical Marks Extended", # 1AB0..1AFF
"Balinese", # 1B00..1B7F
"Sundanese", # 1B80..1BBF
"Batak", # 1BC0..1BFF
"Lepcha", # 1C00..1C4F
"Ol Chiki", # 1C50..1C7F
"Cyrillic Extended-C", # 1C80..1C8F
"Georgian Extended", # 1C90..1CBF
"Sundanese Supplement", # 1CC0..1CCF
"Vedic Extensions", # 1CD0..1CFF
"Phonetic Extensions", # 1D00..1D7F
"Phonetic Extensions Supplement", # 1D80..1DBF
"Combining Diacritical Marks Supplement", # 1DC0..1DFF
"Latin Extended Additional", # 1E00..1EFF
"Greek Extended", # 1F00..1FFF
"General Punctuation", # 2000..206F
"Superscripts and Subscripts", # 2070..209F
"Currency Symbols", # 20A0..20CF
"Combining Diacritical Marks for Symbols", # 20D0..20FF
"Letterlike Symbols", # 2100..214F
"Number Forms", # 2150..218F
"Arrows", # 2190..21FF
"Mathematical Operators", # 2200..22FF
"Miscellaneous Technical", # 2300..23FF
"Control Pictures", # 2400..243F
"Optical Character Recognition", # 2440..245F
"Enclosed Alphanumerics", # 2460..24FF
"Box Drawing", # 2500..257F
"Block Elements", # 2580..259F
"Geometric Shapes", # 25A0..25FF
"Miscellaneous Symbols", # 2600..26FF
"Dingbats", # 2700..27BF
"Miscellaneous Mathematical Symbols-A", # 27C0..27EF
"Supplemental Arrows-A", # 27F0..27FF
"Braille Patterns", # 2800..28FF
"Supplemental Arrows-B", # 2900..297F
"Miscellaneous Mathematical Symbols-B", # 2980..29FF
"Supplemental Mathematical Operators", # 2A00..2AFF
"Miscellaneous Symbols and Arrows", # 2B00..2BFF
"Glagolitic", # 2C00..2C5F
"Latin Extended-C", # 2C60..2C7F
"Coptic", # 2C80..2CFF
"Georgian Supplement", # 2D00..2D2F
"Tifinagh", # 2D30..2D7F
"Ethiopic Extended", # 2D80..2DDF
"Cyrillic Extended-A", # 2DE0..2DFF
"Supplemental Punctuation", # 2E00..2E7F
"CJK Radicals Supplement", # 2E80..2EFF
"Kangxi Radicals", # 2F00..2FDF
"No_Block", # 2FE0..2FEF
"Ideographic Description Characters", # 2FF0..2FFF
"CJK Symbols and Punctuation", # 3000..303F
"Hiragana", # 3040..309F
"Katakana", # 30A0..30FF
"Bopomofo", # 3100..312F
"Hangul Compatibility Jamo", # 3130..318F
"Kanbun", # 3190..319F
"Bopomofo Extended", # 31A0..31BF
"CJK Strokes", # 31C0..31EF
"Katakana Phonetic Extensions", # 31F0..31FF
"Enclosed CJK Letters and Months", # 3200..32FF
"CJK Compatibility", # 3300..33FF
"CJK Unified Ideographs Extension A", # 3400..4DBF
"Yijing Hexagram Symbols", # 4DC0..4DFF
"CJK Unified Ideographs", # 4E00..9FFF
"Yi Syllables", # A000..A48F
"Yi Radicals", # A490..A4CF
"Lisu", # A4D0..A4FF
"Vai", # A500..A63F
"Cyrillic Extended-B", # A640..A69F
"Bamum", # A6A0..A6FF
"Modifier Tone Letters", # A700..A71F
"Latin Extended-D", # A720..A7FF
"Syloti Nagri", # A800..A82F
"Common Indic Number Forms", # A830..A83F
"Phags-pa", # A840..A87F
"Saurashtra", # A880..A8DF
"Devanagari Extended", # A8E0..A8FF
"Kayah Li", # A900..A92F
"Rejang", # A930..A95F
"Hangul Jamo Extended-A", # A960..A97F
"Javanese", # A980..A9DF
"Myanmar Extended-B", # A9E0..A9FF
"Cham", # AA00..AA5F
"Myanmar Extended-A", # AA60..AA7F
"Tai Viet", # AA80..AADF
"Meetei Mayek Extensions", # AAE0..AAFF
"Ethiopic Extended-A", # AB00..AB2F
"Latin Extended-E", # AB30..AB6F
"Cherokee Supplement", # AB70..ABBF
"Meetei Mayek", # ABC0..ABFF
"Hangul Syllables", # AC00..D7AF
"Hangul Jamo Extended-B", # D7B0..D7FF
"High Surrogates", # D800..DB7F
"High Private Use Surrogates", # DB80..DBFF
"Low Surrogates", # DC00..DFFF
"Private Use Area", # E000..F8FF
"CJK Compatibility Ideographs", # F900..FAFF
"Alphabetic Presentation Forms", # FB00..FB4F
"Arabic Presentation Forms-A", # FB50..FDFF
"Variation Selectors", # FE00..FE0F
"Vertical Forms", # FE10..FE1F
"Combining Half Marks", # FE20..FE2F
"CJK Compatibility Forms", # FE30..FE4F
"Small Form Variants", # FE50..FE6F
"Arabic Presentation Forms-B", # FE70..FEFF
"Halfwidth and Fullwidth Forms", # FF00..FFEF
"Specials", # FFF0..FFFF
"Linear B Syllabary", # 10000..1007F
"Linear B Ideograms", # 10080..100FF
"Aegean Numbers", # 10100..1013F
"Ancient Greek Numbers", # 10140..1018F
"Ancient Symbols", # 10190..101CF
"Phaistos Disc", # 101D0..101FF
"No_Block", # 10200..1027F
"Lycian", # 10280..1029F
"Carian", # 102A0..102DF
"Coptic Epact Numbers", # 102E0..102FF
"Old Italic", # 10300..1032F
"Gothic", # 10330..1034F
"Old Permic", # 10350..1037F
"Ugaritic", # 10380..1039F
"Old Persian", # 103A0..103DF
"No_Block", # 103E0..103FF
"Deseret", # 10400..1044F
"Shavian", # 10450..1047F
"Osmanya", # 10480..104AF
"Osage", # 104B0..104FF
"Elbasan", # 10500..1052F
"Caucasian Albanian", # 10530..1056F
"Vithkuqi", # 10570..105BF
"Todhri", # 105C0..105FF
"Linear A", # 10600..1077F
"Latin Extended-F", # 10780..107BF
"No_Block", # 107C0..107FF
"Cypriot Syllabary", # 10800..1083F
"Imperial Aramaic", # 10840..1085F
"Palmyrene", # 10860..1087F
"Nabataean", # 10880..108AF
"No_Block", # 108B0..108DF
"Hatran", # 108E0..108FF
"Phoenician", # 10900..1091F
"Lydian", # 10920..1093F
"No_Block", # 10940..1097F
"Meroitic Hieroglyphs", # 10980..1099F
"Meroitic Cursive", # 109A0..109FF
"Kharoshthi", # 10A00..10A5F
"Old South Arabian", # 10A60..10A7F
"Old North Arabian", # 10A80..10A9F
"No_Block", # 10AA0..10ABF
"Manichaean", # 10AC0..10AFF
"Avestan", # 10B00..10B3F
"Inscriptional Parthian", # 10B40..10B5F
"Inscriptional Pahlavi", # 10B60..10B7F
"Psalter Pahlavi", # 10B80..10BAF
"No_Block", # 10BB0..10BFF
"Old Turkic", # 10C00..10C4F
"No_Block", # 10C50..10C7F
"Old Hungarian", # 10C80..10CFF
"Hanifi Rohingya", # 10D00..10D3F
"Garay", # 10D40..10D8F
"No_Block", # 10D90..10E5F
"Rumi Numeral Symbols", # 10E60..10E7F
"Yezidi", # 10E80..10EBF
"Arabic Extended-C", # 10EC0..10EFF
"Old Sogdian", # 10F00..10F2F
"Sogdian", # 10F30..10F6F
"Old Uyghur", # 10F70..10FAF
"Chorasmian", # 10FB0..10FDF
"Elymaic", # 10FE0..10FFF
"Brahmi", # 11000..1107F
"Kaithi", # 11080..110CF
"Sora Sompeng", # 110D0..110FF
"Chakma", # 11100..1114F
"Mahajani", # 11150..1117F
"Sharada", # 11180..111DF
"Sinhala Archaic Numbers", # 111E0..111FF
"Khojki", # 11200..1124F
"No_Block", # 11250..1127F
"Multani", # 11280..112AF
"Khudawadi", # 112B0..112FF
"Grantha", # 11300..1137F
"Tulu-Tigalari", # 11380..113FF
"Newa", # 11400..1147F
"Tirhuta", # 11480..114DF
"No_Block", # 114E0..1157F
"Siddham", # 11580..115FF
"Modi", # 11600..1165F
"Mongolian Supplement", # 11660..1167F
"Takri", # 11680..116CF
"Myanmar Extended-C", # 116D0..116FF
"Ahom", # 11700..1174F
"No_Block", # 11750..117FF
"Dogra", # 11800..1184F
"No_Block", # 11850..1189F
"Warang Citi", # 118A0..118FF
"Dives Akuru", # 11900..1195F
"No_Block", # 11960..1199F
"Nandinagari", # 119A0..119FF
"Zanabazar Square", # 11A00..11A4F
"Soyombo", # 11A50..11AAF
"Unified Canadian Aboriginal Syllabics Extended-A", # 11AB0..11ABF
"Pau Cin Hau", # 11AC0..11AFF
"Devanagari Extended-A", # 11B00..11B5F
"No_Block", # 11B60..11BBF
"Sunuwar", # 11BC0..11BFF
"Bhaiksuki", # 11C00..11C6F
"Marchen", # 11C70..11CBF
"No_Block", # 11CC0..11CFF
"Masaram Gondi", # 11D00..11D5F
"Gunjala Gondi", # 11D60..11DAF
"No_Block", # 11DB0..11EDF
"Makasar", # 11EE0..11EFF
"Kawi", # 11F00..11F5F
"No_Block", # 11F60..11FAF
"Lisu Supplement", # 11FB0..11FBF
"Tamil Supplement", # 11FC0..11FFF
"Cuneiform", # 12000..123FF
"Cuneiform Numbers and Punctuation", # 12400..1247F
"Early Dynastic Cuneiform", # 12480..1254F
"No_Block", # 12550..12F8F
"Cypro-Minoan", # 12F90..12FFF
"Egyptian Hieroglyphs", # 13000..1342F
"Egyptian Hieroglyph Format Controls", # 13430..1345F
"Egyptian Hieroglyphs Extended-A", # 13460..143FF
"Anatolian Hieroglyphs", # 14400..1467F
"No_Block", # 14680..160FF
"Gurung Khema", # 16100..1613F
"No_Block", # 16140..167FF
"Bamum Supplement", # 16800..16A3F
"Mro", # 16A40..16A6F
"Tangsa", # 16A70..16ACF
"Bassa Vah", # 16AD0..16AFF
"Pahawh Hmong", # 16B00..16B8F
"No_Block", # 16B90..16D3F
"Kirat Rai", # 16D40..16D7F
"No_Block", # 16D80..16E3F
"Medefaidrin", # 16E40..16E9F
"No_Block", # 16EA0..16EFF
"Miao", # 16F00..16F9F
"No_Block", # 16FA0..16FDF
"Ideographic Symbols and Punctuation", # 16FE0..16FFF
"Tangut", # 17000..187FF
"Tangut Components", # 18800..18AFF
"Khitan Small Script", # 18B00..18CFF
"Tangut Supplement", # 18D00..18D7F
"No_Block", # 18D80..1AFEF
"Kana Extended-B", # 1AFF0..1AFFF
"Kana Supplement", # 1B000..1B0FF
"Kana Extended-A", # 1B100..1B12F
"Small Kana Extension", # 1B130..1B16F
"Nushu", # 1B170..1B2FF
"No_Block", # 1B300..1BBFF
"Duployan", # 1BC00..1BC9F
"Shorthand Format Controls", # 1BCA0..1BCAF
"No_Block", # 1BCB0..1CBFF
"Symbols for Legacy Computing Supplement", # 1CC00..1CEBF
"No_Block", # 1CEC0..1CEFF
"Znamenny Musical Notation", # 1CF00..1CFCF
"No_Block", # 1CFD0..1CFFF
"Byzantine Musical Symbols", # 1D000..1D0FF
"Musical Symbols", # 1D100..1D1FF
"Ancient Greek Musical Notation", # 1D200..1D24F
"No_Block", # 1D250..1D2BF
"Kaktovik Numerals", # 1D2C0..1D2DF
"Mayan Numerals", # 1D2E0..1D2FF
"Tai Xuan Jing Symbols", # 1D300..1D35F
"Counting Rod Numerals", # 1D360..1D37F
"No_Block", # 1D380..1D3FF
"Mathematical Alphanumeric Symbols", # 1D400..1D7FF
"Sutton SignWriting", # 1D800..1DAAF
"No_Block", # 1DAB0..1DEFF
"Latin Extended-G", # 1DF00..1DFFF
"Glagolitic Supplement", # 1E000..1E02F
"Cyrillic Extended-D", # 1E030..1E08F
"No_Block", # 1E090..1E0FF
"Nyiakeng Puachue Hmong", # 1E100..1E14F
"No_Block", # 1E150..1E28F
"Toto", # 1E290..1E2BF
"Wancho", # 1E2C0..1E2FF
"No_Block", # 1E300..1E4CF
"Nag Mundari", # 1E4D0..1E4FF
"No_Block", # 1E500..1E5CF
"Ol Onal", # 1E5D0..1E5FF
"No_Block", # 1E600..1E7DF
"Ethiopic Extended-B", # 1E7E0..1E7FF
"Mende Kikakui", # 1E800..1E8DF
"No_Block", # 1E8E0..1E8FF
"Adlam", # 1E900..1E95F
"No_Block", # 1E960..1EC6F
"Indic Siyaq Numbers", # 1EC70..1ECBF
"No_Block", # 1ECC0..1ECFF
"Ottoman Siyaq Numbers", # 1ED00..1ED4F
"No_Block", # 1ED50..1EDFF
"Arabic Mathematical Alphabetic Symbols", # 1EE00..1EEFF
"No_Block", # 1EF00..1EFFF
"Mahjong Tiles", # 1F000..1F02F
"Domino Tiles", # 1F030..1F09F
"Playing Cards", # 1F0A0..1F0FF
"Enclosed Alphanumeric Supplement", # 1F100..1F1FF
"Enclosed Ideographic Supplement", # 1F200..1F2FF
"Miscellaneous Symbols and Pictographs", # 1F300..1F5FF
"Emoticons", # 1F600..1F64F
"Ornamental Dingbats", # 1F650..1F67F
"Transport and Map Symbols", # 1F680..1F6FF
"Alchemical Symbols", # 1F700..1F77F
"Geometric Shapes Extended", # 1F780..1F7FF
"Supplemental Arrows-C", # 1F800..1F8FF
"Supplemental Symbols and Pictographs", # 1F900..1F9FF
"Chess Symbols", # 1FA00..1FA6F
"Symbols and Pictographs Extended-A", # 1FA70..1FAFF
"Symbols for Legacy Computing", # 1FB00..1FBFF
"No_Block", # 1FC00..1FFFF
"CJK Unified Ideographs Extension B", # 20000..2A6DF
"No_Block", # 2A6E0..2A6FF
"CJK Unified Ideographs Extension C", # 2A700..2B73F
"CJK Unified Ideographs Extension D", # 2B740..2B81F
"CJK Unified Ideographs Extension E", # 2B820..2CEAF
"CJK Unified Ideographs Extension F", # 2CEB0..2EBEF
"CJK Unified Ideographs Extension I", # 2EBF0..2EE5F
"No_Block", # 2EE60..2F7FF
"CJK Compatibility Ideographs Supplement", # 2F800..2FA1F
"No_Block", # 2FA20..2FFFF
"CJK Unified Ideographs Extension G", # 30000..3134F
"CJK Unified Ideographs Extension H", # 31350..323AF
"No_Block", # 323B0..DFFFF
"Tags", # E0000..E007F
"No_Block", # E0080..E00FF
"Variation Selectors Supplement", # E0100..E01EF
"No_Block", # E01F0..EFFFF
"Supplementary Private Use Area-A", # F0000..FFFFF
"Supplementary Private Use Area-B", # 100000..10FFFF
]

View File

@ -0,0 +1,50 @@
# Data updated to OpenType 1.8.2 as of January 2018.
# Complete list of OpenType script tags at:
# https://www.microsoft.com/typography/otspec/scripttags.htm
# Most of the script tags are the same as the ISO 15924 tag but lowercased,
# so we only have to handle the exceptional cases:
# - KATAKANA and HIRAGANA both map to 'kana';
# - spaces at the end are preserved, unlike ISO 15924;
# - we map special script codes for Inherited, Common and Unknown to DFLT.
DEFAULT_SCRIPT = "DFLT"
SCRIPT_ALIASES = {
"jamo": "hang",
}
SCRIPT_EXCEPTIONS = {
"Hira": "kana",
"Hrkt": "kana",
"Laoo": "lao ",
"Yiii": "yi ",
"Nkoo": "nko ",
"Vaii": "vai ",
"Zmth": "math",
"Zinh": DEFAULT_SCRIPT,
"Zyyy": DEFAULT_SCRIPT,
"Zzzz": DEFAULT_SCRIPT,
}
SCRIPT_EXCEPTIONS_REVERSED = {
"math": "Zmth",
}
NEW_SCRIPT_TAGS = {
"Beng": ("bng2",),
"Deva": ("dev2",),
"Gujr": ("gjr2",),
"Guru": ("gur2",),
"Knda": ("knd2",),
"Mlym": ("mlm2",),
"Orya": ("ory2",),
"Taml": ("tml2",),
"Telu": ("tel2",),
"Mymr": ("mym2",),
}
NEW_SCRIPT_TAGS_REVERSED = {
value: key for key, values in NEW_SCRIPT_TAGS.items() for value in values
}

View File

@ -0,0 +1,806 @@
# -*- coding: utf-8 -*-
#
# NOTE: This file was auto-generated with MetaTools/buildUCD.py.
# Source: https://unicode.org/Public/UNIDATA/ScriptExtensions.txt
# License: http://unicode.org/copyright.html#License
#
# ScriptExtensions-16.0.0.txt
# Date: 2024-07-30, 19:38:00 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see https://www.unicode.org/reports/tr44/
#
# The Script_Extensions property indicates which characters are commonly used
# with more than one script, but with a limited number of scripts.
# For each code point, there is one or more property values. Each such value is a Script property value.
# For more information, see:
# UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/
# Especially the sections:
# https://www.unicode.org/reports/tr24/#Assignment_Script_Values
# https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
#
# Each Script_Extensions value in this file consists of a set
# of one or more abbreviated Script property values. The ordering of the
# values in that set is not material, but for stability in presentation
# it is given here as alphabetical.
#
# All code points not explicitly listed for Script_Extensions
# have as their value the corresponding Script property value.
#
# @missing: 0000..10FFFF; <script>
RANGES = [
0x0000, # .. 0x02BB ; None
0x02BC, # .. 0x02BC ; {'Beng', 'Cyrl', 'Deva', 'Latn', 'Lisu', 'Thai', 'Toto'}
0x02BD, # .. 0x02C6 ; None
0x02C7, # .. 0x02C7 ; {'Bopo', 'Latn'}
0x02C8, # .. 0x02C8 ; None
0x02C9, # .. 0x02CB ; {'Bopo', 'Latn'}
0x02CC, # .. 0x02CC ; None
0x02CD, # .. 0x02CD ; {'Latn', 'Lisu'}
0x02CE, # .. 0x02D6 ; None
0x02D7, # .. 0x02D7 ; {'Latn', 'Thai'}
0x02D8, # .. 0x02D8 ; None
0x02D9, # .. 0x02D9 ; {'Bopo', 'Latn'}
0x02DA, # .. 0x02FF ; None
0x0300, # .. 0x0300 ; {'Cher', 'Copt', 'Cyrl', 'Grek', 'Latn', 'Perm', 'Sunu', 'Tale'}
0x0301, # .. 0x0301 ; {'Cher', 'Cyrl', 'Grek', 'Latn', 'Osge', 'Sunu', 'Tale', 'Todr'}
0x0302, # .. 0x0302 ; {'Cher', 'Cyrl', 'Latn', 'Tfng'}
0x0303, # .. 0x0303 ; {'Glag', 'Latn', 'Sunu', 'Syrc', 'Thai'}
0x0304, # .. 0x0304 ; {'Aghb', 'Cher', 'Copt', 'Cyrl', 'Goth', 'Grek', 'Latn', 'Osge', 'Syrc', 'Tfng', 'Todr'}
0x0305, # .. 0x0305 ; {'Copt', 'Elba', 'Glag', 'Goth', 'Kana', 'Latn'}
0x0306, # .. 0x0306 ; {'Cyrl', 'Grek', 'Latn', 'Perm'}
0x0307, # .. 0x0307 ; {'Copt', 'Dupl', 'Hebr', 'Latn', 'Perm', 'Syrc', 'Tale', 'Tfng', 'Todr'}
0x0308, # .. 0x0308 ; {'Armn', 'Cyrl', 'Dupl', 'Goth', 'Grek', 'Hebr', 'Latn', 'Perm', 'Syrc', 'Tale'}
0x0309, # .. 0x0309 ; {'Latn', 'Tfng'}
0x030A, # .. 0x030A ; {'Dupl', 'Latn', 'Syrc'}
0x030B, # .. 0x030B ; {'Cher', 'Cyrl', 'Latn', 'Osge'}
0x030C, # .. 0x030C ; {'Cher', 'Latn', 'Tale'}
0x030D, # .. 0x030D ; {'Latn', 'Sunu'}
0x030E, # .. 0x030E ; {'Ethi', 'Latn'}
0x030F, # .. 0x030F ; None
0x0310, # .. 0x0310 ; {'Latn', 'Sunu'}
0x0311, # .. 0x0311 ; {'Cyrl', 'Latn', 'Todr'}
0x0312, # .. 0x0312 ; None
0x0313, # .. 0x0313 ; {'Grek', 'Latn', 'Perm', 'Todr'}
0x0314, # .. 0x031F ; None
0x0320, # .. 0x0320 ; {'Latn', 'Syrc'}
0x0321, # .. 0x0322 ; None
0x0323, # .. 0x0323 ; {'Cher', 'Dupl', 'Kana', 'Latn', 'Syrc'}
0x0324, # .. 0x0324 ; {'Cher', 'Dupl', 'Latn', 'Syrc'}
0x0325, # .. 0x0325 ; {'Latn', 'Syrc'}
0x0326, # .. 0x032C ; None
0x032D, # .. 0x032D ; {'Latn', 'Sunu', 'Syrc'}
0x032E, # .. 0x032E ; {'Latn', 'Syrc'}
0x032F, # .. 0x032F ; None
0x0330, # .. 0x0330 ; {'Cher', 'Latn', 'Syrc'}
0x0331, # .. 0x0331 ; {'Aghb', 'Cher', 'Goth', 'Latn', 'Sunu', 'Thai'}
0x0332, # .. 0x0341 ; None
0x0342, # .. 0x0342 ; {'Grek'}
0x0343, # .. 0x0344 ; None
0x0345, # .. 0x0345 ; {'Grek'}
0x0346, # .. 0x0357 ; None
0x0358, # .. 0x0358 ; {'Latn', 'Osge'}
0x0359, # .. 0x035D ; None
0x035E, # .. 0x035E ; {'Aghb', 'Latn', 'Todr'}
0x035F, # .. 0x0362 ; None
0x0363, # .. 0x036F ; {'Latn'}
0x0370, # .. 0x0373 ; None
0x0374, # .. 0x0375 ; {'Copt', 'Grek'}
0x0376, # .. 0x0482 ; None
0x0483, # .. 0x0483 ; {'Cyrl', 'Perm'}
0x0484, # .. 0x0484 ; {'Cyrl', 'Glag'}
0x0485, # .. 0x0486 ; {'Cyrl', 'Latn'}
0x0487, # .. 0x0487 ; {'Cyrl', 'Glag'}
0x0488, # .. 0x0588 ; None
0x0589, # .. 0x0589 ; {'Armn', 'Geor', 'Glag'}
0x058A, # .. 0x060B ; None
0x060C, # .. 0x060C ; {'Arab', 'Gara', 'Nkoo', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}
0x060D, # .. 0x061A ; None
0x061B, # .. 0x061B ; {'Arab', 'Gara', 'Nkoo', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}
0x061C, # .. 0x061C ; {'Arab', 'Syrc', 'Thaa'}
0x061D, # .. 0x061E ; None
0x061F, # .. 0x061F ; {'Adlm', 'Arab', 'Gara', 'Nkoo', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}
0x0620, # .. 0x063F ; None
0x0640, # .. 0x0640 ; {'Adlm', 'Arab', 'Mand', 'Mani', 'Ougr', 'Phlp', 'Rohg', 'Sogd', 'Syrc'}
0x0641, # .. 0x064A ; None
0x064B, # .. 0x0655 ; {'Arab', 'Syrc'}
0x0656, # .. 0x065F ; None
0x0660, # .. 0x0669 ; {'Arab', 'Thaa', 'Yezi'}
0x066A, # .. 0x066F ; None
0x0670, # .. 0x0670 ; {'Arab', 'Syrc'}
0x0671, # .. 0x06D3 ; None
0x06D4, # .. 0x06D4 ; {'Arab', 'Rohg'}
0x06D5, # .. 0x0950 ; None
0x0951, # .. 0x0951 ; {'Beng', 'Deva', 'Gran', 'Gujr', 'Guru', 'Knda', 'Latn', 'Mlym', 'Orya', 'Shrd', 'Taml', 'Telu', 'Tirh'}
0x0952, # .. 0x0952 ; {'Beng', 'Deva', 'Gran', 'Gujr', 'Guru', 'Knda', 'Latn', 'Mlym', 'Orya', 'Taml', 'Telu', 'Tirh'}
0x0953, # .. 0x0963 ; None
0x0964, # .. 0x0964 ; {'Beng', 'Deva', 'Dogr', 'Gong', 'Gonm', 'Gran', 'Gujr', 'Guru', 'Knda', 'Mahj', 'Mlym', 'Nand', 'Onao', 'Orya', 'Sind', 'Sinh', 'Sylo', 'Takr', 'Taml', 'Telu', 'Tirh'}
0x0965, # .. 0x0965 ; {'Beng', 'Deva', 'Dogr', 'Gong', 'Gonm', 'Gran', 'Gujr', 'Gukh', 'Guru', 'Knda', 'Limb', 'Mahj', 'Mlym', 'Nand', 'Onao', 'Orya', 'Sind', 'Sinh', 'Sylo', 'Takr', 'Taml', 'Telu', 'Tirh'}
0x0966, # .. 0x096F ; {'Deva', 'Dogr', 'Kthi', 'Mahj'}
0x0970, # .. 0x09E5 ; None
0x09E6, # .. 0x09EF ; {'Beng', 'Cakm', 'Sylo'}
0x09F0, # .. 0x0A65 ; None
0x0A66, # .. 0x0A6F ; {'Guru', 'Mult'}
0x0A70, # .. 0x0AE5 ; None
0x0AE6, # .. 0x0AEF ; {'Gujr', 'Khoj'}
0x0AF0, # .. 0x0BE5 ; None
0x0BE6, # .. 0x0BF3 ; {'Gran', 'Taml'}
0x0BF4, # .. 0x0CE5 ; None
0x0CE6, # .. 0x0CEF ; {'Knda', 'Nand', 'Tutg'}
0x0CF0, # .. 0x103F ; None
0x1040, # .. 0x1049 ; {'Cakm', 'Mymr', 'Tale'}
0x104A, # .. 0x10FA ; None
0x10FB, # .. 0x10FB ; {'Geor', 'Glag', 'Latn'}
0x10FC, # .. 0x16EA ; None
0x16EB, # .. 0x16ED ; {'Runr'}
0x16EE, # .. 0x1734 ; None
0x1735, # .. 0x1736 ; {'Buhd', 'Hano', 'Tagb', 'Tglg'}
0x1737, # .. 0x1801 ; None
0x1802, # .. 0x1803 ; {'Mong', 'Phag'}
0x1804, # .. 0x1804 ; None
0x1805, # .. 0x1805 ; {'Mong', 'Phag'}
0x1806, # .. 0x1CCF ; None
0x1CD0, # .. 0x1CD0 ; {'Beng', 'Deva', 'Gran', 'Knda'}
0x1CD1, # .. 0x1CD1 ; {'Deva'}
0x1CD2, # .. 0x1CD2 ; {'Beng', 'Deva', 'Gran', 'Knda'}
0x1CD3, # .. 0x1CD3 ; {'Deva', 'Gran', 'Knda'}
0x1CD4, # .. 0x1CD4 ; {'Deva'}
0x1CD5, # .. 0x1CD6 ; {'Beng', 'Deva'}
0x1CD7, # .. 0x1CD7 ; {'Deva', 'Shrd'}
0x1CD8, # .. 0x1CD8 ; {'Beng', 'Deva'}
0x1CD9, # .. 0x1CD9 ; {'Deva', 'Shrd'}
0x1CDA, # .. 0x1CDA ; {'Deva', 'Knda', 'Mlym', 'Orya', 'Taml', 'Telu'}
0x1CDB, # .. 0x1CDB ; {'Deva'}
0x1CDC, # .. 0x1CDD ; {'Deva', 'Shrd'}
0x1CDE, # .. 0x1CDF ; {'Deva'}
0x1CE0, # .. 0x1CE0 ; {'Deva', 'Shrd'}
0x1CE1, # .. 0x1CE1 ; {'Beng', 'Deva'}
0x1CE2, # .. 0x1CE8 ; {'Deva'}
0x1CE9, # .. 0x1CE9 ; {'Deva', 'Nand'}
0x1CEA, # .. 0x1CEA ; {'Beng', 'Deva'}
0x1CEB, # .. 0x1CEC ; {'Deva'}
0x1CED, # .. 0x1CED ; {'Beng', 'Deva'}
0x1CEE, # .. 0x1CF1 ; {'Deva'}
0x1CF2, # .. 0x1CF2 ; {'Beng', 'Deva', 'Gran', 'Knda', 'Mlym', 'Nand', 'Orya', 'Sinh', 'Telu', 'Tirh', 'Tutg'}
0x1CF3, # .. 0x1CF3 ; {'Deva', 'Gran'}
0x1CF4, # .. 0x1CF4 ; {'Deva', 'Gran', 'Knda', 'Tutg'}
0x1CF5, # .. 0x1CF6 ; {'Beng', 'Deva'}
0x1CF7, # .. 0x1CF7 ; {'Beng'}
0x1CF8, # .. 0x1CF9 ; {'Deva', 'Gran'}
0x1CFA, # .. 0x1CFA ; {'Nand'}
0x1CFB, # .. 0x1DBF ; None
0x1DC0, # .. 0x1DC1 ; {'Grek'}
0x1DC2, # .. 0x1DF7 ; None
0x1DF8, # .. 0x1DF8 ; {'Cyrl', 'Latn', 'Syrc'}
0x1DF9, # .. 0x1DF9 ; None
0x1DFA, # .. 0x1DFA ; {'Syrc'}
0x1DFB, # .. 0x202E ; None
0x202F, # .. 0x202F ; {'Latn', 'Mong', 'Phag'}
0x2030, # .. 0x204E ; None
0x204F, # .. 0x204F ; {'Adlm', 'Arab'}
0x2050, # .. 0x2059 ; None
0x205A, # .. 0x205A ; {'Cari', 'Geor', 'Glag', 'Hung', 'Lyci', 'Orkh'}
0x205B, # .. 0x205C ; None
0x205D, # .. 0x205D ; {'Cari', 'Grek', 'Hung', 'Mero'}
0x205E, # .. 0x20EF ; None
0x20F0, # .. 0x20F0 ; {'Deva', 'Gran', 'Latn'}
0x20F1, # .. 0x2E16 ; None
0x2E17, # .. 0x2E17 ; {'Copt', 'Latn'}
0x2E18, # .. 0x2E2F ; None
0x2E30, # .. 0x2E30 ; {'Avst', 'Orkh'}
0x2E31, # .. 0x2E31 ; {'Avst', 'Cari', 'Geor', 'Hung', 'Kthi', 'Lydi', 'Samr'}
0x2E32, # .. 0x2E3B ; None
0x2E3C, # .. 0x2E3C ; {'Dupl'}
0x2E3D, # .. 0x2E40 ; None
0x2E41, # .. 0x2E41 ; {'Adlm', 'Arab', 'Hung'}
0x2E42, # .. 0x2E42 ; None
0x2E43, # .. 0x2E43 ; {'Cyrl', 'Glag'}
0x2E44, # .. 0x2FEF ; None
0x2FF0, # .. 0x2FFF ; {'Hani', 'Tang'}
0x3000, # .. 0x3000 ; None
0x3001, # .. 0x3001 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Mong', 'Yiii'}
0x3002, # .. 0x3002 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Mong', 'Phag', 'Yiii'}
0x3003, # .. 0x3003 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
0x3004, # .. 0x3005 ; None
0x3006, # .. 0x3006 ; {'Hani'}
0x3007, # .. 0x3007 ; None
0x3008, # .. 0x3009 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Mong', 'Tibt', 'Yiii'}
0x300A, # .. 0x300B ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Lisu', 'Mong', 'Tibt', 'Yiii'}
0x300C, # .. 0x3011 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}
0x3012, # .. 0x3012 ; None
0x3013, # .. 0x3013 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
0x3014, # .. 0x301B ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}
0x301C, # .. 0x301F ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
0x3020, # .. 0x3029 ; None
0x302A, # .. 0x302D ; {'Bopo', 'Hani'}
0x302E, # .. 0x302F ; None
0x3030, # .. 0x3030 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
0x3031, # .. 0x3035 ; {'Hira', 'Kana'}
0x3036, # .. 0x3036 ; None
0x3037, # .. 0x3037 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
0x3038, # .. 0x303B ; None
0x303C, # .. 0x303D ; {'Hani', 'Hira', 'Kana'}
0x303E, # .. 0x303F ; {'Hani'}
0x3040, # .. 0x3098 ; None
0x3099, # .. 0x309C ; {'Hira', 'Kana'}
0x309D, # .. 0x309F ; None
0x30A0, # .. 0x30A0 ; {'Hira', 'Kana'}
0x30A1, # .. 0x30FA ; None
0x30FB, # .. 0x30FB ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}
0x30FC, # .. 0x30FC ; {'Hira', 'Kana'}
0x30FD, # .. 0x318F ; None
0x3190, # .. 0x319F ; {'Hani'}
0x31A0, # .. 0x31BF ; None
0x31C0, # .. 0x31E5 ; {'Hani'}
0x31E6, # .. 0x31EE ; None
0x31EF, # .. 0x31EF ; {'Hani', 'Tang'}
0x31F0, # .. 0x321F ; None
0x3220, # .. 0x3247 ; {'Hani'}
0x3248, # .. 0x327F ; None
0x3280, # .. 0x32B0 ; {'Hani'}
0x32B1, # .. 0x32BF ; None
0x32C0, # .. 0x32CB ; {'Hani'}
0x32CC, # .. 0x32FE ; None
0x32FF, # .. 0x32FF ; {'Hani'}
0x3300, # .. 0x3357 ; None
0x3358, # .. 0x3370 ; {'Hani'}
0x3371, # .. 0x337A ; None
0x337B, # .. 0x337F ; {'Hani'}
0x3380, # .. 0x33DF ; None
0x33E0, # .. 0x33FE ; {'Hani'}
0x33FF, # .. 0xA66E ; None
0xA66F, # .. 0xA66F ; {'Cyrl', 'Glag'}
0xA670, # .. 0xA6FF ; None
0xA700, # .. 0xA707 ; {'Hani', 'Latn'}
0xA708, # .. 0xA82F ; None
0xA830, # .. 0xA832 ; {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Knda', 'Kthi', 'Mahj', 'Mlym', 'Modi', 'Nand', 'Shrd', 'Sind', 'Takr', 'Tirh', 'Tutg'}
0xA833, # .. 0xA835 ; {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Knda', 'Kthi', 'Mahj', 'Modi', 'Nand', 'Shrd', 'Sind', 'Takr', 'Tirh', 'Tutg'}
0xA836, # .. 0xA837 ; {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Kthi', 'Mahj', 'Modi', 'Sind', 'Takr', 'Tirh'}
0xA838, # .. 0xA838 ; {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Kthi', 'Mahj', 'Modi', 'Shrd', 'Sind', 'Takr', 'Tirh'}
0xA839, # .. 0xA839 ; {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Kthi', 'Mahj', 'Modi', 'Sind', 'Takr', 'Tirh'}
0xA83A, # .. 0xA8F0 ; None
0xA8F1, # .. 0xA8F1 ; {'Beng', 'Deva', 'Tutg'}
0xA8F2, # .. 0xA8F2 ; None
0xA8F3, # .. 0xA8F3 ; {'Deva', 'Taml'}
0xA8F4, # .. 0xA92D ; None
0xA92E, # .. 0xA92E ; {'Kali', 'Latn', 'Mymr'}
0xA92F, # .. 0xA9CE ; None
0xA9CF, # .. 0xA9CF ; {'Bugi', 'Java'}
0xA9D0, # .. 0xFD3D ; None
0xFD3E, # .. 0xFD3F ; {'Arab', 'Nkoo'}
0xFD40, # .. 0xFDF1 ; None
0xFDF2, # .. 0xFDF2 ; {'Arab', 'Thaa'}
0xFDF3, # .. 0xFDFC ; None
0xFDFD, # .. 0xFDFD ; {'Arab', 'Thaa'}
0xFDFE, # .. 0xFE44 ; None
0xFE45, # .. 0xFE46 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
0xFE47, # .. 0xFF60 ; None
0xFF61, # .. 0xFF65 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}
0xFF66, # .. 0xFF6F ; None
0xFF70, # .. 0xFF70 ; {'Hira', 'Kana'}
0xFF71, # .. 0xFF9D ; None
0xFF9E, # .. 0xFF9F ; {'Hira', 'Kana'}
0xFFA0, # .. 0x100FF ; None
0x10100, # .. 0x10101 ; {'Cpmn', 'Cprt', 'Linb'}
0x10102, # .. 0x10102 ; {'Cprt', 'Linb'}
0x10103, # .. 0x10106 ; None
0x10107, # .. 0x10133 ; {'Cprt', 'Lina', 'Linb'}
0x10134, # .. 0x10136 ; None
0x10137, # .. 0x1013F ; {'Cprt', 'Linb'}
0x10140, # .. 0x102DF ; None
0x102E0, # .. 0x102FB ; {'Arab', 'Copt'}
0x102FC, # .. 0x10AF1 ; None
0x10AF2, # .. 0x10AF2 ; {'Mani', 'Ougr'}
0x10AF3, # .. 0x11300 ; None
0x11301, # .. 0x11301 ; {'Gran', 'Taml'}
0x11302, # .. 0x11302 ; None
0x11303, # .. 0x11303 ; {'Gran', 'Taml'}
0x11304, # .. 0x1133A ; None
0x1133B, # .. 0x1133C ; {'Gran', 'Taml'}
0x1133D, # .. 0x11FCF ; None
0x11FD0, # .. 0x11FD1 ; {'Gran', 'Taml'}
0x11FD2, # .. 0x11FD2 ; None
0x11FD3, # .. 0x11FD3 ; {'Gran', 'Taml'}
0x11FD4, # .. 0x1BC9F ; None
0x1BCA0, # .. 0x1BCA3 ; {'Dupl'}
0x1BCA4, # .. 0x1D35F ; None
0x1D360, # .. 0x1D371 ; {'Hani'}
0x1D372, # .. 0x1F24F ; None
0x1F250, # .. 0x1F251 ; {'Hani'}
0x1F252, # .. 0x10FFFF ; None
]
VALUES = [
None, # 0000..02BB
{"Beng", "Cyrl", "Deva", "Latn", "Lisu", "Thai", "Toto"}, # 02BC..02BC
None, # 02BD..02C6
{"Bopo", "Latn"}, # 02C7..02C7
None, # 02C8..02C8
{"Bopo", "Latn"}, # 02C9..02CB
None, # 02CC..02CC
{"Latn", "Lisu"}, # 02CD..02CD
None, # 02CE..02D6
{"Latn", "Thai"}, # 02D7..02D7
None, # 02D8..02D8
{"Bopo", "Latn"}, # 02D9..02D9
None, # 02DA..02FF
{"Cher", "Copt", "Cyrl", "Grek", "Latn", "Perm", "Sunu", "Tale"}, # 0300..0300
{"Cher", "Cyrl", "Grek", "Latn", "Osge", "Sunu", "Tale", "Todr"}, # 0301..0301
{"Cher", "Cyrl", "Latn", "Tfng"}, # 0302..0302
{"Glag", "Latn", "Sunu", "Syrc", "Thai"}, # 0303..0303
{
"Aghb",
"Cher",
"Copt",
"Cyrl",
"Goth",
"Grek",
"Latn",
"Osge",
"Syrc",
"Tfng",
"Todr",
}, # 0304..0304
{"Copt", "Elba", "Glag", "Goth", "Kana", "Latn"}, # 0305..0305
{"Cyrl", "Grek", "Latn", "Perm"}, # 0306..0306
{
"Copt",
"Dupl",
"Hebr",
"Latn",
"Perm",
"Syrc",
"Tale",
"Tfng",
"Todr",
}, # 0307..0307
{
"Armn",
"Cyrl",
"Dupl",
"Goth",
"Grek",
"Hebr",
"Latn",
"Perm",
"Syrc",
"Tale",
}, # 0308..0308
{"Latn", "Tfng"}, # 0309..0309
{"Dupl", "Latn", "Syrc"}, # 030A..030A
{"Cher", "Cyrl", "Latn", "Osge"}, # 030B..030B
{"Cher", "Latn", "Tale"}, # 030C..030C
{"Latn", "Sunu"}, # 030D..030D
{"Ethi", "Latn"}, # 030E..030E
None, # 030F..030F
{"Latn", "Sunu"}, # 0310..0310
{"Cyrl", "Latn", "Todr"}, # 0311..0311
None, # 0312..0312
{"Grek", "Latn", "Perm", "Todr"}, # 0313..0313
None, # 0314..031F
{"Latn", "Syrc"}, # 0320..0320
None, # 0321..0322
{"Cher", "Dupl", "Kana", "Latn", "Syrc"}, # 0323..0323
{"Cher", "Dupl", "Latn", "Syrc"}, # 0324..0324
{"Latn", "Syrc"}, # 0325..0325
None, # 0326..032C
{"Latn", "Sunu", "Syrc"}, # 032D..032D
{"Latn", "Syrc"}, # 032E..032E
None, # 032F..032F
{"Cher", "Latn", "Syrc"}, # 0330..0330
{"Aghb", "Cher", "Goth", "Latn", "Sunu", "Thai"}, # 0331..0331
None, # 0332..0341
{"Grek"}, # 0342..0342
None, # 0343..0344
{"Grek"}, # 0345..0345
None, # 0346..0357
{"Latn", "Osge"}, # 0358..0358
None, # 0359..035D
{"Aghb", "Latn", "Todr"}, # 035E..035E
None, # 035F..0362
{"Latn"}, # 0363..036F
None, # 0370..0373
{"Copt", "Grek"}, # 0374..0375
None, # 0376..0482
{"Cyrl", "Perm"}, # 0483..0483
{"Cyrl", "Glag"}, # 0484..0484
{"Cyrl", "Latn"}, # 0485..0486
{"Cyrl", "Glag"}, # 0487..0487
None, # 0488..0588
{"Armn", "Geor", "Glag"}, # 0589..0589
None, # 058A..060B
{"Arab", "Gara", "Nkoo", "Rohg", "Syrc", "Thaa", "Yezi"}, # 060C..060C
None, # 060D..061A
{"Arab", "Gara", "Nkoo", "Rohg", "Syrc", "Thaa", "Yezi"}, # 061B..061B
{"Arab", "Syrc", "Thaa"}, # 061C..061C
None, # 061D..061E
{"Adlm", "Arab", "Gara", "Nkoo", "Rohg", "Syrc", "Thaa", "Yezi"}, # 061F..061F
None, # 0620..063F
{
"Adlm",
"Arab",
"Mand",
"Mani",
"Ougr",
"Phlp",
"Rohg",
"Sogd",
"Syrc",
}, # 0640..0640
None, # 0641..064A
{"Arab", "Syrc"}, # 064B..0655
None, # 0656..065F
{"Arab", "Thaa", "Yezi"}, # 0660..0669
None, # 066A..066F
{"Arab", "Syrc"}, # 0670..0670
None, # 0671..06D3
{"Arab", "Rohg"}, # 06D4..06D4
None, # 06D5..0950
{
"Beng",
"Deva",
"Gran",
"Gujr",
"Guru",
"Knda",
"Latn",
"Mlym",
"Orya",
"Shrd",
"Taml",
"Telu",
"Tirh",
}, # 0951..0951
{
"Beng",
"Deva",
"Gran",
"Gujr",
"Guru",
"Knda",
"Latn",
"Mlym",
"Orya",
"Taml",
"Telu",
"Tirh",
}, # 0952..0952
None, # 0953..0963
{
"Beng",
"Deva",
"Dogr",
"Gong",
"Gonm",
"Gran",
"Gujr",
"Guru",
"Knda",
"Mahj",
"Mlym",
"Nand",
"Onao",
"Orya",
"Sind",
"Sinh",
"Sylo",
"Takr",
"Taml",
"Telu",
"Tirh",
}, # 0964..0964
{
"Beng",
"Deva",
"Dogr",
"Gong",
"Gonm",
"Gran",
"Gujr",
"Gukh",
"Guru",
"Knda",
"Limb",
"Mahj",
"Mlym",
"Nand",
"Onao",
"Orya",
"Sind",
"Sinh",
"Sylo",
"Takr",
"Taml",
"Telu",
"Tirh",
}, # 0965..0965
{"Deva", "Dogr", "Kthi", "Mahj"}, # 0966..096F
None, # 0970..09E5
{"Beng", "Cakm", "Sylo"}, # 09E6..09EF
None, # 09F0..0A65
{"Guru", "Mult"}, # 0A66..0A6F
None, # 0A70..0AE5
{"Gujr", "Khoj"}, # 0AE6..0AEF
None, # 0AF0..0BE5
{"Gran", "Taml"}, # 0BE6..0BF3
None, # 0BF4..0CE5
{"Knda", "Nand", "Tutg"}, # 0CE6..0CEF
None, # 0CF0..103F
{"Cakm", "Mymr", "Tale"}, # 1040..1049
None, # 104A..10FA
{"Geor", "Glag", "Latn"}, # 10FB..10FB
None, # 10FC..16EA
{"Runr"}, # 16EB..16ED
None, # 16EE..1734
{"Buhd", "Hano", "Tagb", "Tglg"}, # 1735..1736
None, # 1737..1801
{"Mong", "Phag"}, # 1802..1803
None, # 1804..1804
{"Mong", "Phag"}, # 1805..1805
None, # 1806..1CCF
{"Beng", "Deva", "Gran", "Knda"}, # 1CD0..1CD0
{"Deva"}, # 1CD1..1CD1
{"Beng", "Deva", "Gran", "Knda"}, # 1CD2..1CD2
{"Deva", "Gran", "Knda"}, # 1CD3..1CD3
{"Deva"}, # 1CD4..1CD4
{"Beng", "Deva"}, # 1CD5..1CD6
{"Deva", "Shrd"}, # 1CD7..1CD7
{"Beng", "Deva"}, # 1CD8..1CD8
{"Deva", "Shrd"}, # 1CD9..1CD9
{"Deva", "Knda", "Mlym", "Orya", "Taml", "Telu"}, # 1CDA..1CDA
{"Deva"}, # 1CDB..1CDB
{"Deva", "Shrd"}, # 1CDC..1CDD
{"Deva"}, # 1CDE..1CDF
{"Deva", "Shrd"}, # 1CE0..1CE0
{"Beng", "Deva"}, # 1CE1..1CE1
{"Deva"}, # 1CE2..1CE8
{"Deva", "Nand"}, # 1CE9..1CE9
{"Beng", "Deva"}, # 1CEA..1CEA
{"Deva"}, # 1CEB..1CEC
{"Beng", "Deva"}, # 1CED..1CED
{"Deva"}, # 1CEE..1CF1
{
"Beng",
"Deva",
"Gran",
"Knda",
"Mlym",
"Nand",
"Orya",
"Sinh",
"Telu",
"Tirh",
"Tutg",
}, # 1CF2..1CF2
{"Deva", "Gran"}, # 1CF3..1CF3
{"Deva", "Gran", "Knda", "Tutg"}, # 1CF4..1CF4
{"Beng", "Deva"}, # 1CF5..1CF6
{"Beng"}, # 1CF7..1CF7
{"Deva", "Gran"}, # 1CF8..1CF9
{"Nand"}, # 1CFA..1CFA
None, # 1CFB..1DBF
{"Grek"}, # 1DC0..1DC1
None, # 1DC2..1DF7
{"Cyrl", "Latn", "Syrc"}, # 1DF8..1DF8
None, # 1DF9..1DF9
{"Syrc"}, # 1DFA..1DFA
None, # 1DFB..202E
{"Latn", "Mong", "Phag"}, # 202F..202F
None, # 2030..204E
{"Adlm", "Arab"}, # 204F..204F
None, # 2050..2059
{"Cari", "Geor", "Glag", "Hung", "Lyci", "Orkh"}, # 205A..205A
None, # 205B..205C
{"Cari", "Grek", "Hung", "Mero"}, # 205D..205D
None, # 205E..20EF
{"Deva", "Gran", "Latn"}, # 20F0..20F0
None, # 20F1..2E16
{"Copt", "Latn"}, # 2E17..2E17
None, # 2E18..2E2F
{"Avst", "Orkh"}, # 2E30..2E30
{"Avst", "Cari", "Geor", "Hung", "Kthi", "Lydi", "Samr"}, # 2E31..2E31
None, # 2E32..2E3B
{"Dupl"}, # 2E3C..2E3C
None, # 2E3D..2E40
{"Adlm", "Arab", "Hung"}, # 2E41..2E41
None, # 2E42..2E42
{"Cyrl", "Glag"}, # 2E43..2E43
None, # 2E44..2FEF
{"Hani", "Tang"}, # 2FF0..2FFF
None, # 3000..3000
{"Bopo", "Hang", "Hani", "Hira", "Kana", "Mong", "Yiii"}, # 3001..3001
{"Bopo", "Hang", "Hani", "Hira", "Kana", "Mong", "Phag", "Yiii"}, # 3002..3002
{"Bopo", "Hang", "Hani", "Hira", "Kana"}, # 3003..3003
None, # 3004..3005
{"Hani"}, # 3006..3006
None, # 3007..3007
{"Bopo", "Hang", "Hani", "Hira", "Kana", "Mong", "Tibt", "Yiii"}, # 3008..3009
{
"Bopo",
"Hang",
"Hani",
"Hira",
"Kana",
"Lisu",
"Mong",
"Tibt",
"Yiii",
}, # 300A..300B
{"Bopo", "Hang", "Hani", "Hira", "Kana", "Yiii"}, # 300C..3011
None, # 3012..3012
{"Bopo", "Hang", "Hani", "Hira", "Kana"}, # 3013..3013
{"Bopo", "Hang", "Hani", "Hira", "Kana", "Yiii"}, # 3014..301B
{"Bopo", "Hang", "Hani", "Hira", "Kana"}, # 301C..301F
None, # 3020..3029
{"Bopo", "Hani"}, # 302A..302D
None, # 302E..302F
{"Bopo", "Hang", "Hani", "Hira", "Kana"}, # 3030..3030
{"Hira", "Kana"}, # 3031..3035
None, # 3036..3036
{"Bopo", "Hang", "Hani", "Hira", "Kana"}, # 3037..3037
None, # 3038..303B
{"Hani", "Hira", "Kana"}, # 303C..303D
{"Hani"}, # 303E..303F
None, # 3040..3098
{"Hira", "Kana"}, # 3099..309C
None, # 309D..309F
{"Hira", "Kana"}, # 30A0..30A0
None, # 30A1..30FA
{"Bopo", "Hang", "Hani", "Hira", "Kana", "Yiii"}, # 30FB..30FB
{"Hira", "Kana"}, # 30FC..30FC
None, # 30FD..318F
{"Hani"}, # 3190..319F
None, # 31A0..31BF
{"Hani"}, # 31C0..31E5
None, # 31E6..31EE
{"Hani", "Tang"}, # 31EF..31EF
None, # 31F0..321F
{"Hani"}, # 3220..3247
None, # 3248..327F
{"Hani"}, # 3280..32B0
None, # 32B1..32BF
{"Hani"}, # 32C0..32CB
None, # 32CC..32FE
{"Hani"}, # 32FF..32FF
None, # 3300..3357
{"Hani"}, # 3358..3370
None, # 3371..337A
{"Hani"}, # 337B..337F
None, # 3380..33DF
{"Hani"}, # 33E0..33FE
None, # 33FF..A66E
{"Cyrl", "Glag"}, # A66F..A66F
None, # A670..A6FF
{"Hani", "Latn"}, # A700..A707
None, # A708..A82F
{
"Deva",
"Dogr",
"Gujr",
"Guru",
"Khoj",
"Knda",
"Kthi",
"Mahj",
"Mlym",
"Modi",
"Nand",
"Shrd",
"Sind",
"Takr",
"Tirh",
"Tutg",
}, # A830..A832
{
"Deva",
"Dogr",
"Gujr",
"Guru",
"Khoj",
"Knda",
"Kthi",
"Mahj",
"Modi",
"Nand",
"Shrd",
"Sind",
"Takr",
"Tirh",
"Tutg",
}, # A833..A835
{
"Deva",
"Dogr",
"Gujr",
"Guru",
"Khoj",
"Kthi",
"Mahj",
"Modi",
"Sind",
"Takr",
"Tirh",
}, # A836..A837
{
"Deva",
"Dogr",
"Gujr",
"Guru",
"Khoj",
"Kthi",
"Mahj",
"Modi",
"Shrd",
"Sind",
"Takr",
"Tirh",
}, # A838..A838
{
"Deva",
"Dogr",
"Gujr",
"Guru",
"Khoj",
"Kthi",
"Mahj",
"Modi",
"Sind",
"Takr",
"Tirh",
}, # A839..A839
None, # A83A..A8F0
{"Beng", "Deva", "Tutg"}, # A8F1..A8F1
None, # A8F2..A8F2
{"Deva", "Taml"}, # A8F3..A8F3
None, # A8F4..A92D
{"Kali", "Latn", "Mymr"}, # A92E..A92E
None, # A92F..A9CE
{"Bugi", "Java"}, # A9CF..A9CF
None, # A9D0..FD3D
{"Arab", "Nkoo"}, # FD3E..FD3F
None, # FD40..FDF1
{"Arab", "Thaa"}, # FDF2..FDF2
None, # FDF3..FDFC
{"Arab", "Thaa"}, # FDFD..FDFD
None, # FDFE..FE44
{"Bopo", "Hang", "Hani", "Hira", "Kana"}, # FE45..FE46
None, # FE47..FF60
{"Bopo", "Hang", "Hani", "Hira", "Kana", "Yiii"}, # FF61..FF65
None, # FF66..FF6F
{"Hira", "Kana"}, # FF70..FF70
None, # FF71..FF9D
{"Hira", "Kana"}, # FF9E..FF9F
None, # FFA0..100FF
{"Cpmn", "Cprt", "Linb"}, # 10100..10101
{"Cprt", "Linb"}, # 10102..10102
None, # 10103..10106
{"Cprt", "Lina", "Linb"}, # 10107..10133
None, # 10134..10136
{"Cprt", "Linb"}, # 10137..1013F
None, # 10140..102DF
{"Arab", "Copt"}, # 102E0..102FB
None, # 102FC..10AF1
{"Mani", "Ougr"}, # 10AF2..10AF2
None, # 10AF3..11300
{"Gran", "Taml"}, # 11301..11301
None, # 11302..11302
{"Gran", "Taml"}, # 11303..11303
None, # 11304..1133A
{"Gran", "Taml"}, # 1133B..1133C
None, # 1133D..11FCF
{"Gran", "Taml"}, # 11FD0..11FD1
None, # 11FD2..11FD2
{"Gran", "Taml"}, # 11FD3..11FD3
None, # 11FD4..1BC9F
{"Dupl"}, # 1BCA0..1BCA3
None, # 1BCA4..1D35F
{"Hani"}, # 1D360..1D371
None, # 1D372..1F24F
{"Hani"}, # 1F250..1F251
None, # 1F252..10FFFF
]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,298 @@
from __future__ import annotations
from fontTools.misc.textTools import byteord, tostr
import re
from bisect import bisect_right
from typing import Literal, TypeVar, overload
try:
# use unicodedata backport compatible with python2:
# https://github.com/fonttools/unicodedata2
from unicodedata2 import *
except ImportError: # pragma: no cover
# fall back to built-in unicodedata (possibly outdated)
from unicodedata import *
from . import Blocks, Scripts, ScriptExtensions, OTTags
__all__ = [
# names from built-in unicodedata module
"lookup",
"name",
"decimal",
"digit",
"numeric",
"category",
"bidirectional",
"combining",
"east_asian_width",
"mirrored",
"decomposition",
"normalize",
"unidata_version",
"ucd_3_2_0",
# additonal functions
"block",
"script",
"script_extension",
"script_name",
"script_code",
"script_horizontal_direction",
"ot_tags_from_script",
"ot_tag_to_script",
]
def script(char):
"""Return the four-letter script code assigned to the Unicode character
'char' as string.
>>> script("a")
'Latn'
>>> script(",")
'Zyyy'
>>> script(chr(0x10FFFF))
'Zzzz'
"""
code = byteord(char)
# 'bisect_right(a, x, lo=0, hi=len(a))' returns an insertion point which
# comes after (to the right of) any existing entries of x in a, and it
# partitions array a into two halves so that, for the left side
# all(val <= x for val in a[lo:i]), and for the right side
# all(val > x for val in a[i:hi]).
# Our 'SCRIPT_RANGES' is a sorted list of ranges (only their starting
# breakpoints); we want to use `bisect_right` to look up the range that
# contains the given codepoint: i.e. whose start is less than or equal
# to the codepoint. Thus, we subtract -1 from the index returned.
i = bisect_right(Scripts.RANGES, code)
return Scripts.VALUES[i - 1]
def script_extension(char):
"""Return the script extension property assigned to the Unicode character
'char' as a set of string.
>>> script_extension("a") == {'Latn'}
True
>>> script_extension(chr(0x060C)) == {'Nkoo', 'Arab', 'Rohg', 'Thaa', 'Syrc', 'Gara', 'Yezi'}
True
>>> script_extension(chr(0x10FFFF)) == {'Zzzz'}
True
"""
code = byteord(char)
i = bisect_right(ScriptExtensions.RANGES, code)
value = ScriptExtensions.VALUES[i - 1]
if value is None:
# code points not explicitly listed for Script Extensions
# have as their value the corresponding Script property value
return {script(char)}
return value
def script_name(code, default=KeyError):
"""Return the long, human-readable script name given a four-letter
Unicode script code.
If no matching name is found, a KeyError is raised by default.
You can use the 'default' argument to return a fallback value (e.g.
'Unknown' or None) instead of throwing an error.
"""
try:
return str(Scripts.NAMES[code].replace("_", " "))
except KeyError:
if isinstance(default, type) and issubclass(default, KeyError):
raise
return default
_normalize_re = re.compile(r"[-_ ]+")
def _normalize_property_name(string):
"""Remove case, strip space, '-' and '_' for loose matching."""
return _normalize_re.sub("", string).lower()
_SCRIPT_CODES = {_normalize_property_name(v): k for k, v in Scripts.NAMES.items()}
def script_code(script_name, default=KeyError):
"""Returns the four-letter Unicode script code from its long name
If no matching script code is found, a KeyError is raised by default.
You can use the 'default' argument to return a fallback string (e.g.
'Zzzz' or None) instead of throwing an error.
"""
normalized_name = _normalize_property_name(script_name)
try:
return _SCRIPT_CODES[normalized_name]
except KeyError:
if isinstance(default, type) and issubclass(default, KeyError):
raise
return default
# The data on script direction is taken from Harfbuzz source code:
# https://github.com/harfbuzz/harfbuzz/blob/3.2.0/src/hb-common.cc#L514-L613
# This in turn references the following "Script_Metadata" document:
# https://docs.google.com/spreadsheets/d/1Y90M0Ie3MUJ6UVCRDOypOtijlMDLNNyyLk36T6iMu0o
RTL_SCRIPTS = {
# Unicode-1.1 additions
"Arab", # Arabic
"Hebr", # Hebrew
# Unicode-3.0 additions
"Syrc", # Syriac
"Thaa", # Thaana
# Unicode-4.0 additions
"Cprt", # Cypriot
# Unicode-4.1 additions
"Khar", # Kharoshthi
# Unicode-5.0 additions
"Phnx", # Phoenician
"Nkoo", # Nko
# Unicode-5.1 additions
"Lydi", # Lydian
# Unicode-5.2 additions
"Avst", # Avestan
"Armi", # Imperial Aramaic
"Phli", # Inscriptional Pahlavi
"Prti", # Inscriptional Parthian
"Sarb", # Old South Arabian
"Orkh", # Old Turkic
"Samr", # Samaritan
# Unicode-6.0 additions
"Mand", # Mandaic
# Unicode-6.1 additions
"Merc", # Meroitic Cursive
"Mero", # Meroitic Hieroglyphs
# Unicode-7.0 additions
"Mani", # Manichaean
"Mend", # Mende Kikakui
"Nbat", # Nabataean
"Narb", # Old North Arabian
"Palm", # Palmyrene
"Phlp", # Psalter Pahlavi
# Unicode-8.0 additions
"Hatr", # Hatran
"Hung", # Old Hungarian
# Unicode-9.0 additions
"Adlm", # Adlam
# Unicode-11.0 additions
"Rohg", # Hanifi Rohingya
"Sogo", # Old Sogdian
"Sogd", # Sogdian
# Unicode-12.0 additions
"Elym", # Elymaic
# Unicode-13.0 additions
"Chrs", # Chorasmian
"Yezi", # Yezidi
# Unicode-14.0 additions
"Ougr", # Old Uyghur
}
HorizDirection = Literal["RTL", "LTR"]
T = TypeVar("T")
@overload
def script_horizontal_direction(script_code: str, default: T) -> HorizDirection | T: ...
@overload
def script_horizontal_direction(
script_code: str, default: type[KeyError] = KeyError
) -> HorizDirection: ...
def script_horizontal_direction(
script_code: str, default: T | type[KeyError] = KeyError
) -> HorizDirection | T:
"""Return "RTL" for scripts that contain right-to-left characters
according to the Bidi_Class property. Otherwise return "LTR".
"""
if script_code not in Scripts.NAMES:
if isinstance(default, type) and issubclass(default, KeyError):
raise default(script_code)
return default
return "RTL" if script_code in RTL_SCRIPTS else "LTR"
def block(char):
"""Return the block property assigned to the Unicode character 'char'
as a string.
>>> block("a")
'Basic Latin'
>>> block(chr(0x060C))
'Arabic'
>>> block(chr(0xEFFFF))
'No_Block'
"""
code = byteord(char)
i = bisect_right(Blocks.RANGES, code)
return Blocks.VALUES[i - 1]
def ot_tags_from_script(script_code):
"""Return a list of OpenType script tags associated with a given
Unicode script code.
Return ['DFLT'] script tag for invalid/unknown script codes.
"""
if script_code in OTTags.SCRIPT_EXCEPTIONS:
return [OTTags.SCRIPT_EXCEPTIONS[script_code]]
if script_code not in Scripts.NAMES:
return [OTTags.DEFAULT_SCRIPT]
script_tags = [script_code[0].lower() + script_code[1:]]
if script_code in OTTags.NEW_SCRIPT_TAGS:
script_tags.extend(OTTags.NEW_SCRIPT_TAGS[script_code])
script_tags.reverse() # last in, first out
return script_tags
def ot_tag_to_script(tag):
"""Return the Unicode script code for the given OpenType script tag, or
None for "DFLT" tag or if there is no Unicode script associated with it.
Raises ValueError if the tag is invalid.
"""
tag = tostr(tag).strip()
if not tag or " " in tag or len(tag) > 4:
raise ValueError("invalid OpenType tag: %r" % tag)
if tag in OTTags.SCRIPT_ALIASES:
tag = OTTags.SCRIPT_ALIASES[tag]
while len(tag) != 4:
tag += str(" ") # pad with spaces
if tag == OTTags.DEFAULT_SCRIPT:
# it's unclear which Unicode script the "DFLT" OpenType tag maps to,
# so here we return None
return None
if tag in OTTags.NEW_SCRIPT_TAGS_REVERSED:
return OTTags.NEW_SCRIPT_TAGS_REVERSED[tag]
if tag in OTTags.SCRIPT_EXCEPTIONS_REVERSED:
return OTTags.SCRIPT_EXCEPTIONS_REVERSED[tag]
# This side of the conversion is fully algorithmic
# Any spaces at the end of the tag are replaced by repeating the last
# letter. Eg 'nko ' -> 'Nkoo'.
# Change first char to uppercase
script_code = tag[0].upper() + tag[1]
for i in range(2, 4):
script_code += script_code[i - 1] if tag[i] == " " else tag[i]
if script_code not in Scripts.NAMES:
return None
return script_code