#!/usr/bin/env python3 # A little utility to dump ranges of unicode glyphs to visually # compare terminal/glyph output. # based on https://github.com/bew/dotfiles/blob/0de029ce22/bin/unicode-dump import argparse UNICODE_BLOCKS = { "Powerline": (0xe0b0, 0xe0d4), "Basic Latin": (0x0000, 0x007F), "Latin-1 Supplement": (0x0080, 0x00FF), "Latin Extended-A": (0x0100, 0x017F), "Latin Extended-B": (0x0180, 0x024F), "IPA Extensions": (0x0250, 0x02AF), "Spacing Modifier Letters": (0x02B0, 0x02FF), "Combining Diacritical Marks": (0x0300, 0x036F), "Greek and Coptic": (0x0370, 0x03FF), "Cyrillic": (0x0400, 0x04FF), "Cyrillic Supplement": (0x0500, 0x052F), "Armenian": (0x0530, 0x058F), "Hebrew": (0x0590, 0x05FF), "Arabic": (0x0600, 0x06FF), "Syriac": (0x0700, 0x074F), "Arabic Supplement": (0x0750, 0x077F), "Thaana": (0x0780, 0x07BF), "NKo": (0x07C0, 0x07FF), "Samaritan": (0x0800, 0x083F), "Mandaic": (0x0840, 0x085F), "Syriac Supplement": (0x0860, 0x086F), "Arabic Extended-A": (0x08A0, 0x08FF), "Devanagari": (0x0900, 0x097F), "Bengali": (0x0980, 0x09FF), "Gurmukhi": (0x0A00, 0x0A7F), "Gujarati": (0x0A80, 0x0AFF), "Oriya": (0x0B00, 0x0B7F), "Tamil": (0x0B80, 0x0BFF), "Telugu": (0x0C00, 0x0C7F), "Kannada": (0x0C80, 0x0CFF), "Malayalam": (0x0D00, 0x0D7F), "Sinhala": (0x0D80, 0x0DFF), "Thai": (0x0E00, 0x0E7F), "Lao": (0x0E80, 0x0EFF), "Tibetan": (0x0F00, 0x0FFF), "Myanmar": (0x1000, 0x109F), "Georgian": (0x10A0, 0x10FF), "Hangul Jamo": (0x1100, 0x11FF), "Ethiopic": (0x1200, 0x137F), "Ethiopic Supplement": (0x1380, 0x139F), "Cherokee": (0x13A0, 0x13FF), "Unified Canadian Aboriginal Syllabics": (0x1400, 0x167F), "Ogham": (0x1680, 0x169F), "Runic": (0x16A0, 0x16FF), "Tagalog": (0x1700, 0x171F), "Hanunoo": (0x1720, 0x173F), "Buhid": (0x1740, 0x175F), "Tagbanwa": (0x1760, 0x177F), "Khmer": (0x1780, 0x17FF), "Mongolian": (0x1800, 0x18AF), "Unified Canadian Aboriginal Syllabics Extended": (0x18B0, 0x18FF), "Limbu": (0x1900, 0x194F), "Tai Le": (0x1950, 0x197F), "New Tai Lue": (0x1980, 0x19DF), "Khmer Symbols": (0x19E0, 0x19FF), "Buginese": (0x1A00, 0x1A1F), "Tai Tham": (0x1A20, 0x1AAF), "Combining Diacritical Marks Extended": (0x1AB0, 0x1AFF), "Balinese": (0x1B00, 0x1B7F), "Sundanese": (0x1B80, 0x1BBF), "Batak": (0x1BC0, 0x1BFF), "Lepcha": (0x1C00, 0x1C4F), "Ol Chiki": (0x1C50, 0x1C7F), "Cyrillic Extended C": (0x1C80, 0x1C8F), "Georgian Extended": (0x1C90, 0x1CBF), "Sundanese Supplement": (0x1CC0, 0x1CCF), "Vedic Extensions": (0x1CD0, 0x1CFF), "Phonetic Extensions": (0x1D00, 0x1D7F), "Phonetic Extensions Supplement": (0x1D80, 0x1DBF), "Combining Diacritical Marks Supplement": (0x1DC0, 0x1DFF), "Latin Extended Additional": (0x1E00, 0x1EFF), "Greek Extended": (0x1F00, 0x1FFF), "General Punctuation": (0x2000, 0x206F), "Superscripts and Subscripts": (0x2070, 0x209F), "Currency Symbols": (0x20A0, 0x20CF), "Combining Diacritical Marks for Symbols": (0x20D0, 0x20FF), "Letterlike Symbols": (0x2100, 0x214F), "Number Forms": (0x2150, 0x218F), "Arrows": (0x2190, 0x21FF), "Mathematical Operators": (0x2200, 0x22FF), "Miscellaneous Technical": (0x2300, 0x23FF), "Control Pictures": (0x2400, 0x243F), "Optical Character Recognition": (0x2440, 0x245F), "Enclosed Alphanumerics": (0x2460, 0x24FF), "Box Drawing": (0x2500, 0x257F), "Block Elements": (0x2580, 0x259F), "Geometric Shapes": (0x25A0, 0x25FF), "Miscellaneous Symbols": (0x2600, 0x26FF), "Dingbats": (0x2700, 0x27BF), "Miscellaneous Mathematical Symbols-A": (0x27C0, 0x27EF), "Supplemental Arrows-A": (0x27F0, 0x27FF), "Braille Patterns": (0x2800, 0x28FF), "Supplemental Arrows-B": (0x2900, 0x297F), "Miscellaneous Mathematical Symbols-B": (0x2980, 0x29FF), "Supplemental Mathematical Operators": (0x2A00, 0x2AFF), "Miscellaneous Symbols and Arrows": (0x2B00, 0x2BFF), "Glagolitic": (0x2C00, 0x2C5F), "Latin Extended-C": (0x2C60, 0x2C7F), "Coptic": (0x2C80, 0x2CFF), "Georgian Supplement": (0x2D00, 0x2D2F), "Tifinagh": (0x2D30, 0x2D7F), "Ethiopic Extended": (0x2D80, 0x2DDF), "Cyrillic Extended-A": (0x2DE0, 0x2DFF), "Supplemental Punctuation": (0x2E00, 0x2E7F), "CJK Radicals Supplement": (0x2E80, 0x2EFF), "Kangxi Radicals": (0x2F00, 0x2FDF), "Ideographic Description Characters": (0x2FF0, 0x2FFF), "CJK Symbols and Punctuation": (0x3000, 0x303F), "Hiragana": (0x3040, 0x309F), "Katakana": (0x30A0, 0x30FF), "Bopomofo": (0x3100, 0x312F), "Hangul Compatibility Jamo": (0x3130, 0x318F), "Kanbun": (0x3190, 0x319F), "Bopomofo Extended": (0x31A0, 0x31BF), "CJK Strokes": (0x31C0, 0x31EF), "Katakana Phonetic Extensions": (0x31F0, 0x31FF), "Enclosed CJK Letters and Months": (0x3200, 0x32FF), "CJK Compatibility": (0x3300, 0x33FF), "CJK Unified Ideographs Extension A": (0x3400, 0x4DBF), "Yijing Hexagram Symbols": (0x4DC0, 0x4DFF), "CJK Unified Ideographs": (0x4E00, 0x9FFF), "Yi Syllables": (0xA000, 0xA48F), "Yi Radicals": (0xA490, 0xA4CF), "Lisu": (0xA4D0, 0xA4FF), "Vai": (0xA500, 0xA63F), "Cyrillic Extended-B": (0xA640, 0xA69F), "Bamum": (0xA6A0, 0xA6FF), "Modifier Tone Letters": (0xA700, 0xA71F), "Latin Extended-D": (0xA720, 0xA7FF), "Syloti Nagri": (0xA800, 0xA82F), "Common Indic Number Forms": (0xA830, 0xA83F), "Phags-pa": (0xA840, 0xA87F), "Saurashtra": (0xA880, 0xA8DF), "Devanagari Extended": (0xA8E0, 0xA8FF), "Kayah Li": (0xA900, 0xA92F), "Rejang": (0xA930, 0xA95F), "Hangul Jamo Extended-A": (0xA960, 0xA97F), "Javanese": (0xA980, 0xA9DF), "Myanmar Extended-B": (0xA9E0, 0xA9FF), "Cham": (0xAA00, 0xAA5F), "Myanmar Extended-A": (0xAA60, 0xAA7F), "Tai Viet": (0xAA80, 0xAADF), "Meetei Mayek Extensions": (0xAAE0, 0xAAFF), "Ethiopic Extended-A": (0xAB00, 0xAB2F), "Latin Extended-E": (0xAB30, 0xAB6F), "Cherokee Supplement": (0xAB70, 0xABBF), "Meetei Mayek": (0xABC0, 0xABFF), "Hangul Syllables": (0xAC00, 0xD7AF), "Hangul Jamo Extended-B": (0xD7B0, 0xD7FF), "High Surrogates": (0xD800, 0xDB7F), "High Private Use Surrogates": (0xDB80, 0xDBFF), "Low Surrogates": (0xDC00, 0xDFFF), "Private Use Area": (0xE000, 0xF8FF), "CJK Compatibility Ideographs": (0xF900, 0xFAFF), "Alphabetic Presentation Forms": (0xFB00, 0xFB4F), "Arabic Presentation Forms-A": (0xFB50, 0xFDFF), "Variation Selectors": (0xFE00, 0xFE0F), "Vertical Forms": (0xFE10, 0xFE1F), "Combining Half Marks": (0xFE20, 0xFE2F), "CJK Compatibility Forms": (0xFE30, 0xFE4F), "Small Form Variants": (0xFE50, 0xFE6F), "Arabic Presentation Forms-B": (0xFE70, 0xFEFF), "Halfwidth and Fullwidth Forms": (0xFF00, 0xFFEF), "Specials": (0xFFF0, 0xFFFF), "Linear B Syllabary": (0x10000, 0x1007F), "Linear B Ideograms": (0x10080, 0x100FF), "Aegean Numbers": (0x10100, 0x1013F), "Ancient Greek Numbers": (0x10140, 0x1018F), "Ancient Symbols": (0x10190, 0x101CF), "Phaistos Disc": (0x101D0, 0x101FF), "Lycian": (0x10280, 0x1029F), "Carian": (0x102A0, 0x102DF), "Coptic Epact Numbers": (0x102E0, 0x102FF), "Old Italic": (0x10300, 0x1032F), "Gothic": (0x10330, 0x1034F), "Old Permic": (0x10350, 0x1037F), "Ugaritic": (0x10380, 0x1039F), "Old Persian": (0x103A0, 0x103DF), "Deseret": (0x10400, 0x1044F), "Shavian": (0x10450, 0x1047F), "Osmanya": (0x10480, 0x104AF), "Osage": (0x104B0, 0x104FF), "Elbasan": (0x10500, 0x1052F), "Caucasian Albanian": (0x10530, 0x1056F), "Linear A": (0x10600, 0x1077F), "Cypriot Syllabary": (0x10800, 0x1083F), "Imperial Aramaic": (0x10840, 0x1085F), "Palmyrene": (0x10860, 0x1087F), "Nabataean": (0x10880, 0x108AF), "Hatran": (0x108E0, 0x108FF), "Phoenician": (0x10900, 0x1091F), "Lydian": (0x10920, 0x1093F), "Meroitic Hieroglyphs": (0x10980, 0x1099F), "Meroitic Cursive": (0x109A0, 0x109FF), "Kharoshthi": (0x10A00, 0x10A5F), "Old South Arabian": (0x10A60, 0x10A7F), "Old North Arabian": (0x10A80, 0x10A9F), "Manichaean": (0x10AC0, 0x10AFF), "Avestan": (0x10B00, 0x10B3F), "Inscriptional Parthian": (0x10B40, 0x10B5F), "Inscriptional Pahlavi": (0x10B60, 0x10B7F), "Psalter Pahlavi": (0x10B80, 0x10BAF), "Old Turkic": (0x10C00, 0x10C4F), "Old Hungarian": (0x10C80, 0x10CFF), "Hanifi Rohingya": (0x10D00, 0x10D3F), "Rumi Numeral Symbols": (0x10E60, 0x10E7F), "Yezidi": (0x10E80, 0x10EBF), "Old Sogdian": (0x10F00, 0x10F2F), "Sogdian": (0x10F30, 0x10F6F), "Chorasmian": (0x10FB0, 0x10FDF), "Elymaic": (0x10FE0, 0x10FFF), "Brahmi": (0x11000, 0x1107F), "Kaithi": (0x11080, 0x110CF), "Sora Sompeng": (0x110D0, 0x110FF), "Chakma": (0x11100, 0x1114F), "Mahajani": (0x11150, 0x1117F), "Sharada": (0x11180, 0x111DF), "Sinhala Archaic Numbers": (0x111E0, 0x111FF), "Khojki": (0x11200, 0x1124F), "Multani": (0x11280, 0x112AF), "Khudawadi": (0x112B0, 0x112FF), "Grantha": (0x11300, 0x1137F), "Newa": (0x11400, 0x1147F), "Tirhuta": (0x11480, 0x114DF), "Siddham": (0x11580, 0x115FF), "Modi": (0x11600, 0x1165F), "Mongolian Supplement": (0x11660, 0x1167F), "Takri": (0x11680, 0x116CF), "Ahom": (0x11700, 0x1173F), "Dogra": (0x11800, 0x1184F), "Warang Citi": (0x118A0, 0x118FF), "Dives Akuru": (0x11900, 0x1195F), "Nandinagari": (0x119A0, 0x119FF), "Zanabazar Square": (0x11A00, 0x11A4F), "Soyombo": (0x11A50, 0x11AAF), "Pau Cin Hau": (0x11AC0, 0x11AFF), "Bhaiksuki": (0x11C00, 0x11C6F), "Marchen": (0x11C70, 0x11CBF), "Masaram Gondi": (0x11D00, 0x11D5F), "Gunjala Gondi": (0x11D60, 0x11DAF), "Makasar": (0x11EE0, 0x11EFF), "Lisu Supplement": (0x11FB0, 0x11FBF), "Tamil Supplement": (0x11FC0, 0x11FFF), "Cuneiform": (0x12000, 0x123FF), "Cuneiform Numbers and Punctuation": (0x12400, 0x1247F), "Early Dynastic Cuneiform": (0x12480, 0x1254F), "Egyptian Hieroglyphs": (0x13000, 0x1342F), "Egyptian Hieroglyph Format Controls": (0x13430, 0x1343F), "Anatolian Hieroglyphs": (0x14400, 0x1467F), "Bamum Supplement": (0x16800, 0x16A3F), "Mro": (0x16A40, 0x16A6F), "Bassa Vah": (0x16AD0, 0x16AFF), "Pahawh Hmong": (0x16B00, 0x16B8F), "Medefaidrin": (0x16E40, 0x16E9F), "Miao": (0x16F00, 0x16F9F), "Ideographic Symbols and Punctuation": (0x16FE0, 0x16FFF), "Tangut": (0x17000, 0x187FF), "Tangut Components": (0x18800, 0x18AFF), "Khitan Small Script": (0x18B00, 0x18CFF), "Tangut Supplement": (0x18D00, 0x18D8F), "Kana Supplement": (0x1B000, 0x1B0FF), "Kana Extended-A": (0x1B100, 0x1B12F), "Small Kana Extension": (0x1B130, 0x1B16F), "Nushu": (0x1B170, 0x1B2FF), "Duployan": (0x1BC00, 0x1BC9F), "Shorthand Format Controls": (0x1BCA0, 0x1BCAF), "Byzantine Musical Symbols": (0x1D000, 0x1D0FF), "Musical Symbols": (0x1D100, 0x1D1FF), "Ancient Greek Musical Notation": (0x1D200, 0x1D24F), "Mayan Numerals": (0x1D2E0, 0x1D2FF), "Tai Xuan Jing Symbols": (0x1D300, 0x1D35F), "Counting Rod Numerals": (0x1D360, 0x1D37F), "Mathematical Alphanumeric Symbols": (0x1D400, 0x1D7FF), "Sutton SignWriting": (0x1D800, 0x1DAAF), "Glagolitic Supplement": (0x1E000, 0x1E02F), "Nyiakeng Puachue Hmong": (0x1E100, 0x1E14F), "Wancho": (0x1E2C0, 0x1E2FF), "Mende Kikakui": (0x1E800, 0x1E8DF), "Adlam": (0x1E900, 0x1E95F), "Indic Siyaq Numbers": (0x1EC70, 0x1ECBF), "Ottoman Siyaq Numbers": (0x1ED00, 0x1ED4F), "Arabic Mathematical Alphabetic Symbols": (0x1EE00, 0x1EEFF), "Mahjong Tiles": (0x1F000, 0x1F02F), "Domino Tiles": (0x1F030, 0x1F09F), "Playing Cards": (0x1F0A0, 0x1F0FF), "Enclosed Alphanumeric Supplement": (0x1F100, 0x1F1FF), "Enclosed Ideographic Supplement": (0x1F200, 0x1F2FF), "Miscellaneous Symbols and Pictographs": (0x1F300, 0x1F5FF), "Emoticons (Emoji)": (0x1F600, 0x1F64F), "Ornamental Dingbats": (0x1F650, 0x1F67F), "Transport and Map Symbols": (0x1F680, 0x1F6FF), "Alchemical Symbols": (0x1F700, 0x1F77F), "Geometric Shapes Extended": (0x1F780, 0x1F7FF), "Supplemental Arrows-C": (0x1F800, 0x1F8FF), "Supplemental Symbols and Pictographs": (0x1F900, 0x1F9FF), "Chess Symbols": (0x1FA00, 0x1FA6F), "Symbols and Pictographs Extended-A": (0x1FA70, 0x1FAFF), "Symbols for Legacy Computing": (0x1FB00, 0x1FBFF), "CJK Unified Ideographs Extension B": (0x20000, 0x2A6DF), "CJK Unified Ideographs Extension C": (0x2A700, 0x2B73F), "CJK Unified Ideographs Extension D": (0x2B740, 0x2B81F), "CJK Unified Ideographs Extension E": (0x2B820, 0x2CEAF), "CJK Unified Ideographs Extension F": (0x2CEB0, 0x2EBEF), "CJK Compatibility Ideographs Supplement": (0x2F800, 0x2FA1F), "CJK Unified Ideographs Extension G": (0x30000, 0x3134F), "Tags": (0xE0000, 0xE007F), "Variation Selectors Supplement": (0xE0100, 0xE01EF), "Supplementary Private Use Area-A": (0xF0000, 0xFFFFF), "Supplementary Private Use Area-B": (0x100000, 0x10FFFF), } def resolve_block_name(name): lname = name.lower() candidates = [] for k in UNICODE_BLOCKS.keys(): if lname in k.lower(): candidates.append(k) if len(candidates) == 0: raise KeyError("{} is not a valid block name".format(name)) if len(candidates) == 1: return candidates[0] raise KeyError("{} could match {}".format(name, ", ".join(candidates))) def print_range(start, end): print("0x{:x} -> 0x{:x}".format(start, end)) is_alt = False for cp in range(start, end+1): if cp & 0x1f == 0: if is_alt: print("\n\x1b[44m", end='') else: print("\n\x1b[0m", end='') is_alt = not is_alt print(chr(cp), end=' ') print("\x1b[0m") def do_show_range(args): print_range(int(args.start, 0), int(args.end, 0)) def do_show_block(args): name = resolve_block_name(args.name) print("Showing glyphs in `{}`".format(name)) (start, end) = UNICODE_BLOCKS[name] print_range(start, end) def do_list_blocks(args): print("\n".join(sorted(list(UNICODE_BLOCKS.keys())))) parser = argparse.ArgumentParser() sub = parser.add_subparsers() show_block = sub.add_parser("show-block", help="print glyphs in named block") show_block.add_argument("name") show_block.set_defaults(func=do_show_block) list_blocks = sub.add_parser("list-blocks", help="list unicode blocks") list_blocks.set_defaults(func=do_list_blocks) show_range = sub.add_parser("show-range", help="print glyphs in a range") show_range.add_argument('start') show_range.add_argument('end') show_range.set_defaults(func=do_show_range) args = parser.parse_args() args.func(args) print("", flush=True)