Hash :
8e10315e
Author :
Thomas de Grivel
Date :
2024-02-13T15:33:38
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
# CompositionExclusions-15.1.0.txt
# Date: 2023-01-05
# © 2023 Unicode®, Inc.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see https://www.unicode.org/reports/tr44/
#
# This file lists the characters for the Composition Exclusion Table
# defined in UAX #15, Unicode Normalization Forms.
#
# This file is a normative contributory data file in the
# Unicode Character Database.
#
# For more information, see
# https://www.unicode.org/reports/tr15/#Primary_Exclusion_List_Table
#
# For a full derivation of composition exclusions, see the derived property
# Full_Composition_Exclusion in DerivedNormalizationProps.txt
#
# ================================================
# (1) Script Specifics
#
# This list of characters cannot be derived from the UnicodeData.txt file.
#
# Included are the following subcategories:
#
# - Many precomposed characters using a nukta diacritic in the Devanagari,
# Bangla/Bengali, Gurmukhi, or Odia/Oriya scripts.
# - Tibetan letters and subjoined letters with decompositions including
# U+0FB7 TIBETAN SUBJOINED LETTER HA or U+0FB5 TIBETAN SUBJOINED LETTER SSA.
# - Two two-part Tibetan vowel signs involving top and bottom pieces.
# - A large collection of compatibility precomposed characters for Hebrew
# involving dagesh and/or other combining marks.
#
# This list is unlikely to grow.
#
# ================================================
0958 # DEVANAGARI LETTER QA
0959 # DEVANAGARI LETTER KHHA
095A # DEVANAGARI LETTER GHHA
095B # DEVANAGARI LETTER ZA
095C # DEVANAGARI LETTER DDDHA
095D # DEVANAGARI LETTER RHA
095E # DEVANAGARI LETTER FA
095F # DEVANAGARI LETTER YYA
09DC # BENGALI LETTER RRA
09DD # BENGALI LETTER RHA
09DF # BENGALI LETTER YYA
0A33 # GURMUKHI LETTER LLA
0A36 # GURMUKHI LETTER SHA
0A59 # GURMUKHI LETTER KHHA
0A5A # GURMUKHI LETTER GHHA
0A5B # GURMUKHI LETTER ZA
0A5E # GURMUKHI LETTER FA
0B5C # ORIYA LETTER RRA
0B5D # ORIYA LETTER RHA
0F43 # TIBETAN LETTER GHA
0F4D # TIBETAN LETTER DDHA
0F52 # TIBETAN LETTER DHA
0F57 # TIBETAN LETTER BHA
0F5C # TIBETAN LETTER DZHA
0F69 # TIBETAN LETTER KSSA
0F76 # TIBETAN VOWEL SIGN VOCALIC R
0F78 # TIBETAN VOWEL SIGN VOCALIC L
0F93 # TIBETAN SUBJOINED LETTER GHA
0F9D # TIBETAN SUBJOINED LETTER DDHA
0FA2 # TIBETAN SUBJOINED LETTER DHA
0FA7 # TIBETAN SUBJOINED LETTER BHA
0FAC # TIBETAN SUBJOINED LETTER DZHA
0FB9 # TIBETAN SUBJOINED LETTER KSSA
FB1D # HEBREW LETTER YOD WITH HIRIQ
FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH
FB2A # HEBREW LETTER SHIN WITH SHIN DOT
FB2B # HEBREW LETTER SHIN WITH SIN DOT
FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
FB2E # HEBREW LETTER ALEF WITH PATAH
FB2F # HEBREW LETTER ALEF WITH QAMATS
FB30 # HEBREW LETTER ALEF WITH MAPIQ
FB31 # HEBREW LETTER BET WITH DAGESH
FB32 # HEBREW LETTER GIMEL WITH DAGESH
FB33 # HEBREW LETTER DALET WITH DAGESH
FB34 # HEBREW LETTER HE WITH MAPIQ
FB35 # HEBREW LETTER VAV WITH DAGESH
FB36 # HEBREW LETTER ZAYIN WITH DAGESH
FB38 # HEBREW LETTER TET WITH DAGESH
FB39 # HEBREW LETTER YOD WITH DAGESH
FB3A # HEBREW LETTER FINAL KAF WITH DAGESH
FB3B # HEBREW LETTER KAF WITH DAGESH
FB3C # HEBREW LETTER LAMED WITH DAGESH
FB3E # HEBREW LETTER MEM WITH DAGESH
FB40 # HEBREW LETTER NUN WITH DAGESH
FB41 # HEBREW LETTER SAMEKH WITH DAGESH
FB43 # HEBREW LETTER FINAL PE WITH DAGESH
FB44 # HEBREW LETTER PE WITH DAGESH
FB46 # HEBREW LETTER TSADI WITH DAGESH
FB47 # HEBREW LETTER QOF WITH DAGESH
FB48 # HEBREW LETTER RESH WITH DAGESH
FB49 # HEBREW LETTER SHIN WITH DAGESH
FB4A # HEBREW LETTER TAV WITH DAGESH
FB4B # HEBREW LETTER VAV WITH HOLAM
FB4C # HEBREW LETTER BET WITH RAFE
FB4D # HEBREW LETTER KAF WITH RAFE
FB4E # HEBREW LETTER PE WITH RAFE
# Total code points: 67
# ================================================
# (2) Post Composition Version precomposed characters
#
# These characters cannot be derived solely from the UnicodeData.txt file
# in this version of Unicode.
#
# Note that characters added to the standard after the
# Composition Version and which have canonical decomposition mappings
# are not automatically added to this list of Post Composition
# Version precomposed characters.
# ================================================
2ADC # FORKING
1D15E # MUSICAL SYMBOL HALF NOTE
1D15F # MUSICAL SYMBOL QUARTER NOTE
1D160 # MUSICAL SYMBOL EIGHTH NOTE
1D161 # MUSICAL SYMBOL SIXTEENTH NOTE
1D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE
1D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE
1D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
1D1BB # MUSICAL SYMBOL MINIMA
1D1BC # MUSICAL SYMBOL MINIMA BLACK
1D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE
1D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK
1D1BF # MUSICAL SYMBOL FUSA WHITE
1D1C0 # MUSICAL SYMBOL FUSA BLACK
# Total code points: 14
# ================================================
# (3) Singleton Decompositions
#
# These characters can be derived from the UnicodeData.txt file
# by including all canonically decomposable characters whose
# canonical decomposition consists of a single character.
#
# These characters are simply quoted here for reference.
# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
# ================================================
# 0340..0341 [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
# 0343 COMBINING GREEK KORONIS
# 0374 GREEK NUMERAL SIGN
# 037E GREEK QUESTION MARK
# 0387 GREEK ANO TELEIA
# 1F71 GREEK SMALL LETTER ALPHA WITH OXIA
# 1F73 GREEK SMALL LETTER EPSILON WITH OXIA
# 1F75 GREEK SMALL LETTER ETA WITH OXIA
# 1F77 GREEK SMALL LETTER IOTA WITH OXIA
# 1F79 GREEK SMALL LETTER OMICRON WITH OXIA
# 1F7B GREEK SMALL LETTER UPSILON WITH OXIA
# 1F7D GREEK SMALL LETTER OMEGA WITH OXIA
# 1FBB GREEK CAPITAL LETTER ALPHA WITH OXIA
# 1FBE GREEK PROSGEGRAMMENI
# 1FC9 GREEK CAPITAL LETTER EPSILON WITH OXIA
# 1FCB GREEK CAPITAL LETTER ETA WITH OXIA
# 1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
# 1FDB GREEK CAPITAL LETTER IOTA WITH OXIA
# 1FE3 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
# 1FEB GREEK CAPITAL LETTER UPSILON WITH OXIA
# 1FEE..1FEF [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA
# 1FF9 GREEK CAPITAL LETTER OMICRON WITH OXIA
# 1FFB GREEK CAPITAL LETTER OMEGA WITH OXIA
# 1FFD GREEK OXIA
# 2000..2001 [2] EN QUAD..EM QUAD
# 2126 OHM SIGN
# 212A..212B [2] KELVIN SIGN..ANGSTROM SIGN
# 2329 LEFT-POINTING ANGLE BRACKET
# 232A RIGHT-POINTING ANGLE BRACKET
# F900..FA0D [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
# FA10 CJK COMPATIBILITY IDEOGRAPH-FA10
# FA12 CJK COMPATIBILITY IDEOGRAPH-FA12
# FA15..FA1E [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
# FA20 CJK COMPATIBILITY IDEOGRAPH-FA20
# FA22 CJK COMPATIBILITY IDEOGRAPH-FA22
# FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
# FA2A..FA6D [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D
# FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
# 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 1035
# ================================================
# (4) Non-Starter Decompositions
#
# These characters can be derived from the UnicodeData.txt file
# by including each expanding canonical decomposition
# (i.e., those which canonically decompose to a sequence
# of characters instead of a single character), such that:
#
# A. The character is not a Starter.
#
# OR (inclusive)
#
# B. The character's canonical decomposition begins
# with a character that is not a Starter.
#
# Note that a "Starter" is any character with a zero combining class.
#
# These characters are simply quoted here for reference.
# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
# ================================================
# 0344 COMBINING GREEK DIALYTIKA TONOS
# 0F73 TIBETAN VOWEL SIGN II
# 0F75 TIBETAN VOWEL SIGN UU
# 0F81 TIBETAN VOWEL SIGN REVERSED II
# Total code points: 4
# EOF