; Bugs: ; I\ does not convert correctly to IPA and back to CXS ; (probably the same for U\). (defparameter *charmap-ipa->cxs-iso-8859-1* '( ; Each entry x is a list: ; (first x) = Unicode ; (second x) = CXS ; (third x) = description ; (fourth x) = degre of deprecation (if missing: not deprecated) ; ; First, trivial mappings that are included in order to detect that ; the input string is valid. These are basically lowercase ASCII ; characters: ; a b c d e f h i j k l m n o p q r s t u v w x y z SPACE . | (#x0020 " " "space is a general break for keeping the overview") (#x002e "." "syllable break") (#x007c "|" "minor intonation break") (#x0061 "a" "open front unrounded vowel") (#x0062 "b" "voiced bilabial stop") (#x0063 "c" "voiceless palatal stop") (#x0064 "d" "voiced alveolar stop") (#x0065 "e" "front close-mid unrounded vowel") (#x0066 "f" "voiceless labialdental fricative") ;(#x0067 "g") ; is is a script g in IPA (#x0068 "h" "voiceless glottal fricative") (#x0069 "i" "front close unrounded vowel") (#x006a "j" "voiced palatal approximant") (#x006b "k" "voiceless velar stop") (#x006c "l" "voiced alveolar lateral approximant") (#x006d "m" "voiced bilabial nasal") (#x006e "n" "voiced alveolar nasal") (#x006f "o" "back close-mid rounded vowel") (#x0070 "p" "voiceless bilabial stop") (#x0071 "q" "voiceless uvular stop") (#x0072 "r" "voiced alveolar trill") (#x0073 "s" "voiceless alveolar fricative") (#x0074 "t" "voiceless alveolar stop") (#x0075 "u" "back close rounded vowel") (#x0076 "v" "voiced labiodental fricative") (#x0077 "w" "voiced labiovelar approximant") (#x0078 "x" "voiceless velar fricative") (#x0079 "y" "front close rounded vowel") (#x007a "z" "voiced alveolar fricative") ; latin extended A: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (#x0127 "X\\" "pharyngeal voiceless fricative") (#x014B "N" "eng") (#x0153 "9" "oe") ; latin extended B: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (#x0180 "B" "b with stroke: same as beta" 1) ;(#x0188 "" "c with hook: needed?") ;(#x0192 "" "f with hook: needed?") ;(#x0195 "" "hv lig") ;(#x0199 "" "k with hook") ;(#x019B "" "l with stroke") ;(#x019E "" "n with long right leg") (#x01A5 "p_<" "p with hook: voiceless implosive?") (#x01AB "t_j" "t with palatal hook: ancient for t_j" 1) (#x01AD "t_<" "t with hook: voiceless implosive?") (#x01BB "dz)" "two with stroke: archaic for [dz]; use U+02A3 instead" 1) ; (#x01BB "d_z") ; OPTION (#x01C0 "|\\" "dental click") (#x01C1 "|\\|\\" "lateral click") (#x01C2 "=\\" "alveolar click") (#x01C3 "!\\" "retroflex click") ; IPA extensions: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (#x0250 "6" "turned a") (#x0251 "A" "script a / latin letter alpha") (#x0252 "Q" "turned script a / latin letter turned alpha") (#x0253 "b_<" "b with hook") (#x0254 "O" "open o") (#x0255 "s\\" "c with curl") (#x0256 "d`" "d with tail") (#x0257 "d_<" "d with hook") (#x0258 "@\\" "reversed e") (#x0259 "@" "turned e / schwa") (#x025A "@`" "schwa with hook: @ + rhoticity") (#x025B "E" "epsilon") (#x025C "3" "reversed epsilon") (#x025D "3`" "reversed epsilon with hook: 3 + rhoticity") (#x025E "3\\" "closed epsilon") (#x025F "J\\" "dotless j with stroke: vcd. palatal stop") (#x0260 "g_<" "g with hook: implosive") (#x0261 "g" "script g == g = vcd. velar stop") (#x0262 "G\\" "sc g = vcd. uvul. stop") (#x0263 "G" "gamma: vcd. velar fric.") (#x0264 "7" "rams horn / baby gamma: unr. back. high-mid vowel") (#x0265 "H" "turned h: vcd. lab.-pal. approx.") (#x0266 "h\\" "h with hook: vcd. glot. fric.") (#x0267 "x\\" "heng with hook: sim. S and x") (#x0268 "1" "i with stroke") ; (#x0268 "i\" "i with stroke") ; OPTION (#x0269 "I" "letter iota. obsoleted by sc i (U+026A)" 1) (#x026A "I" "sc i") (#x026B "5" "l with middle tilde: velarised l") (#x026C "K" "l with belt: vcl. alv. lat. fric.") (#x026D "l`" "l with retrfl. hook: retroflex lateral") (#x026E "K\\" "lezh: vcd. alv. lat. fric.") (#x026F "M" "turned m") (#x0270 "M\\" "turned m with long leg") (#x0271 "F" "turned m with hook") (#x0272 "J" "n with left hook") (#x0273 "n`" "n with retrfl. hook") (#x0274 "N\\" "sc n") (#x0275 "8" "barred o") (#x0276 "&\\" "sc oe") (#x0277 "U" "closed omega: better use small upsilon (U+028A)" 1) (#x0278 "p\\" "phi") (#x0279 "r\\" "turned r") (#x027A "l\\" "turned r with long leg") (#x027B "r\\`" "turned r with hook") (#x027C "r\\_r" "r with long leg: obsoleted by r + raised" 1) (#x027D "r`" "r with tail") (#x027E "4" "r with fishhook") (#x027F "z=" "reversed r with fishhook: apical dental vowel: use syllabic z" 1) (#x0280 "R\\" "sc r: uvul. trill") (#x0281 "R" "inverted sc r: uvul. vcd. fric.") (#x0282 "s`" "s with hook") (#x0283 "S" "esh") (#x0284 "J\\_<" "dotless j with stroke and hook: patal. vcd. impl.") (#x0285 "z`=" "squat reversed esh: apical retr. vowel: use syllabic z`" 1) (#x0286 "S_j" "esh with curl: use palatal S" 1) (#x0287 "|\\" "turned t: dental click. obsolete: use U+01C0" 1) (#x0288 "t`" "t with retrfl. hook") (#x0289 "u\\" "letter u bar") (#x028A "U" "upsilon") (#x028B "v\\" "v with hook") ; (#x028B "P") ; OPTION (#x028C "V" "turned v") (#x028D "W" "turned w") (#x028E "L" "turned y: palat. lat. approx.") (#x028F "Y" "sc y") (#x0290 "z`" "z with retr. hook") (#x0291 "z\\" "z with curl") (#x0292 "Z" "ezh / yogh") (#x0293 "Z_j" "ezh with curl: palatalised vcd. postalv. fric." 1) (#x0294 "?" "glottal stop") (#x0295 "?\\" "pharyngeal vcd. fric.") (#x0296 "|\\|\\" "inverted glottal stop: lateral click. obsolete: use U+01C1" 1) (#x0297 "!\\" "stretched c: palatal / alveolar click. obsolete: use U+01C3" 1) (#x0298 "O\\" "bilabial click") (#x0299 "B\\" "sc b") (#x029A "&\\" "closed open E (sic!): non-IPA for sc oe, use U+0276 instead" 1) (#x029B "G\\_<" "sc g with hook") (#x029C "H\\" "sc h: vcl. epiglottal fric.") (#x029D "j\\" "j with crossed-tail") ;(#x029E "" "turned k: what? a velar click?") (#x029F "L\\" "sc l: velar lat. approx.") (#x02A0 "q_<" "q with hook: vcl. uvul. impl.") (#x02A1 ">\\" "glottal stop with stroke: voiced epiglottal stop") (#x02A2 "<\\" "reversed glottal stop with stroke: voiced epiglottal stop") (#x02A3 "dz)" "dz digraph: FIXME: Unicode 3.2 says: 'vcd. dental affricate', but would that not rather be dD) or d_dD) then?") ; (#x02A3 "d_z") ; OPTION (#x02A4 "dZ)" "dezh digraph") ; (#x02A4 "d_Z") ; OPTION (#x02A5 "dz\\)" "dz digraph with curl") ; (#x02A5 "d_z\\") ; OPTION (#x02A6 "ts)" "ts digraph: FIXME: see U+02A3 above") ; (#x02A6 "t_s") ; OPTION (#x02A7 "tS)" "tesh digraph") ; (#x02A7 "t_S") ; OPTION (#x02A8 "ts\\)" "ts digraph with curl") ; (#x02A8 "t_s\\") ; OPTION (#x02A9 "fN)" "feng digraph") ; (#x02A9 "f_N") ; OPTION (#x02AA "ls)" "ls digraph") ; (#x02AA "l_s") ; OPTION (#x02AB "lz)" "lz digraph") ; (#x02AB "l_z") ; OPTION (#x02AC "._w_w" "FIXME: I made this up!") (#x02AD "._d_d" "FIXME: I made this up!") ; spacing modifier letters: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; NOTE: The spacing variants of the modifiers that are usually used as ; diacritics are listed here but marked optional. Use the ; combining diacritical marks instead. (#x02B0 "_h" "aspirated; small h") (#x02B1 "_t" "small h with hook: breathy voiced") (#x02B2 "_j" "small j: palatalised") ;(#x02B3 "" "small r") ;(#x02B4 "" "small turned r") ;(#x02B5 "" "small turned r with hook") ;(#x02B6 "" "small sc r") (#x02B7 "_w" "small w") (#x02B8 "_j" "small y: usage= small j") ;(#x02B9 "" "mod. letter prime") ;(#x02BA "" "mod. letter double prime") ;(#x02BB "" "mod. letter turned comma") (#x02BC "_>" "apostrophy: glottalised/ejective: FIXME: do you use this for Korean, too?" ) ;(#x02BD "") ;(#x02BE "?" "arabic hamza: glot. stop") ; OPTION ;(#x02BF "X\\" "arabic ain: vcd. pharyngeal fric.") ; OPTION (#x02C0 "_>" "small glot.stop: glottalised/ejective; better use U+02BC" 1) ;(#x02C1 "X\\") ; OPTION ;(#x02C2 "_+" "left arrowhead: fronted articulation") ; OPTION ;(#x02C3 "_-" "righ arrowhead: backed articulation") ; OPTION ;(#x02C4 "_^" "up arrowhead: raised articulation") ; OPTION ;(#x02C5 "_o" "down arrowhea: lowered articulation") ; OPTION ;(#x02C6 "" "circumfix: different meanings") (#x02C7 "_F_R" "caron: falling-rising (Mandarin 3rd) tone") ; OPTION (#x02C8 "'" "primary stress") ; (#x02C8 "\"") ; OPTION (#x02C9 "_T" "spacing macron: high (Mandarin 1st) tone; use U+030B instead" 1) ; OPTION ;(#x02CA "_H_T" "high-rising (Mandarin 2nd) tone; ; deprecated: acute is high level tone") ; OPTION ;(#x02CB "" "different meanings") (#x02CC "\"" "letter vert. line below: secondary stress") ; (#x02CC "%") ; OPTION (#x02CD "_L" "letter low macro: low level tone; use U+0300 instead") ; OPTION (#x02CE "_L_B" "letter low grave: low-falling tone") ; OPTION (#x02CF "_B_L" "low-rising tone") ; OPTION (#x02D0 ":" "triangular colon: length mark") (#x02D1 ":\\" "half triangular colon: half-long") (#x02D2 "_O" "centred right half ring: more rounded; use U+0339 instead") ; OPTION (#x02D3 "_c" "centred left half ring: less rounded; use U+031C instead") ; OPTION (#x02D4 "_r" "up tack: raised; use U+031D instead") ; OPTION (#x02D5 "_o" "down tack: lowered; use U+031E instead") ; OPTION (#x02D6 "_+" "plus: advanced; use U+031F instead") ; OPTION (#x02D7 "_-" "minus: retracted; use U+0320 instead") ; OPTION (#x02D8 "_X" "breve: extra-short; use U+0306 instead") ; OPTION ;(#x02D9 "" "dot above") (#x02DA "_0" "ring above: voiceless (on characters with a descender); use U+030A instead") ; OPTION ;(#x02DB "" "ogonek") (#x02DC "~" "small tilde: nasalised; use U+0303 instead"); (#x02DC "_~") ; OPTION (#x02DD "_T" "double acute accent: extra high level tone; use U+030B instead") ; OPTION (#x02DE "`" "rhotic hook") ;(#x02DF "" "cross accent: swedish grave accent") (#x02E0 "_G" "small gamma: velarised") (#x02E1 "_l" "small l: lateral release") ;(#x02E2 "" "small s") ;(#x02E3 "" "small x") (#x02E4 "_?\\" "reversed glot.stop: pharyngealised") ; We include letter tone marks by preceding them with a syllable break . ; These letters are marked as deprecated, since they should not be the ; output when converting IPA to CXS. (#x02E5 "_T" "level tone: extra high (top); accent variant is U+030B") (#x02E6 "_H" "level tone: high; accent variant is U+0301") (#x02E7 "_M" "level tone: mid; accent variant is U+0304") (#x02E8 "_L" "level tone: low; accent variant is U+0300") (#x02E9 "_B" "level tone: extra low (bottom); accent variant is U+030F") ;(#x02EA "" "yin departing tone mark") ;(#x02EB "" "yang departing tone mark") (#x02EC "_v" "letter voicing; use U+032C instead") ; OPTION ;(#x02ED "" "unaspirated") ;(#x02EE "" "double apostrophe (Nenets)") ; Combining diacritics ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (#x0300 "_L" "grave: low level tone") (#x0301 "_H" "acute: high level tone") (#x0302 "_F" "circum: falling") (#x0303 "~" "nasalised") ; (#x0303 "_~"); OPTION (#x0304 "_M" "macron: mid level tone") ;(#x0305 "") (#x0306 "_X" "breve: extra-short") ;(#x0307 "") (#x0308 "_\"" "diaeresis: centralised") ;(#x0309 "") (#x030A "_0" "ring above: voiceless" 1) ; ring below is usually better than above (#x030B "_T" "double acute: extra-high level tone") (#x030C "_R" "caron: rising") (#x030D "=" "vertical line above: syllabic (e.g. N=)") ; (#x030D "_="); OPTION ;(#x030E "") (#x030F "_B" "double grave: extra-low level tone") ;(#x0310 "" "candrabindu") ;(#x0311 "") ;(#x0312 "") ;(#x0313 "") ;(#x0314 "") ;(#x0315 "") ;(#x0316 "") ;(#x0317 "") (#x0318 "_A" "left tack below: advanced tongue root") (#x0319 "_q" "right tack below: advanced tongue root") (#x031A "_}" "left angle above: no audible release") ;(#x031B "" "horn") (#x031C "_c" "left halt ring below: more open") (#x031D "_r" "up tack below: raised") (#x031E "_o" "down tack below: lowered") (#x031F "_+" "plus sign below: advanced or fronted") (#x0320 "_-" "minus sign below: retracted or backed") (#x0321 "_j" "palatised hook") (#x0322 "`" "retroflex hook") ;(#x0323 "" "dot below") (#x0324 "_t" "diaeresis below: breathy voiced") (#x0325 "_0" "ring below: voiceless") ;(#x0326 "" "comma below") ;(#x0327 "" "combining cedilla: ; NOTE: c cedilla is only translated if given as U+00E7, ; but not if given as U+0063 U+0327!") (#x0328 "~" "combining ogonek") ; (#x0328 "_~") ; OPTION (#x0329 "=" "vert line below: syllabic") ; (#x0329 "_=") ; OPTION (#x032A "_d" "bridge below: dental") (#x032B "_w" "double arch below: labialisation") (#x032C "_v" "caron below: voiced") ;(#x032D "" "circum below: fronted") ; (#x032D "_+") ; OPTION ;(#x032E "" "breve below") (#x032F "_^" "inv. breve below: non-syllabic") (#x0330 "_k" "tilde below: creaky voiced") ;(#x0331 "" "macron below") ;(#x0332 "" "underline") ;(#x0333 "" "double underline") (#x0334 "_e" "tilde overlay: velarisation/pharyngealisation") ;(#x0335 "" "short stroke overlay") ;(#x0336 "" "long stroke overlay") ;(#x0337 "" "short solidus overlay") ;(#x0338 "" "long solidus overlay") (#x0339 "_O" "right half ring below: more rounded") (#x033A "_a" "inv. bridge below: apical") (#x033B "_m" "square below: laminal") (#x033C "_N" "inv. double arch/seagull below: linguolabial") (#x033D "_x" "x above: mid-centralised") ;... (#x0361 "_" "combining double inverted breve: linking. A special algorithm needs to convert a_i into ai)" ) ; Greek: (#x03B2 "B" "beta") (#x03B8 "T" "theta") ;(#x03BB "" "lambda: FIXME: was this not used IPA?") ;(#x03C5 "v\\" "nu: labiadental approx. Use U+028B.") ; (#x03C5 "v\\"); upsilon ;(#x03C6 "p\\" "phi: Use U+0278.") (#x03C7 "X" "chi: uvul. vcl. fric.") ; General punctuation: (#x2016 "||" "double vertical line: major (intonation) group") ;(#x2021 "=\\" "double dagger") ; OPTION (#x203F "-\\" "linking, absence of a break") ; undertie ; Superscripts and subscripts: (#x207F "_n" "small n: nasal release") ; Arrows (#x2191 "^" "upwards arrow: FIXME: Unicode 3.2 says: ingressive airflow?? I translated this as upstep." ) ; (#x2193 "!" "downwards arrow: FIXME: same thing. I translated this as downstep.") (#x2197 "" "ne arrow: global rise") (#x2198 "" "se arrow: global fall") ) ) (defparameter *charmap-cxs->ipa-supplement* ;"This contains additional entries for the reverse mapping that cannot ; be inferred by simply reversing the above list. Note that the second ; entry is a list of unicode characters." '( ("v\\" (#x028B)) ("P" (#x028B)) ("1" (#x0268)) ("i\\" (#x0268)) ("I\\" (#x026A #x335)) ; no IPA ("U\\" (#x028A #x336)) ; no IPA ("l_e" (#x026B)) ; an additional palatalisation diacritic: (";" (#x02B2)) ("%" (#x02CC)) ; secondary stress, alternative 1 ("," (#x02CC)) ; secondary stress, alternative 2 ("_~" (#x0303)) ; alternative for ~ ("_=" (#x0329)) ; better put this below by default (also an alternative for =) ("=" (#x0329)) ; better put this below by default ("~" (#x0303)) ; not ogonek, but special nasalised ("_0" (#x0325)) ; better put this below by default ("d_z" (#\d #x0361 #\z)) ; OPTION: (#x02A3) ("d_Z" (#\d #x0361 #x0292)) ; OPTION: (#x02A4) ("d_z\\" (#\d #x0361 #x0291)) ; OPTION: (#x02A5) ("t_s" (#\t #x0361 #\s)) ; OPTION: (#x02A6) ("t_S" (#\t #x0361 #x0283)) ; OPTION: (#x02A7) ("t_s\\" (#\t #x0361 #x0255)) ; OPTION: (#x02A8) ("f_N" (#\f #x0361 #x014B)) ; OPTION: (#x02A9) ("l_s" (#\l #x0361 #\s)) ; OPTION: (#x02AA) ("l_z" (#\l #x0361 #\z)) ; OPTION: (#x02AB) ; use small j and w: ("_j" (#x02B2)) ("_w" (#x02B7)) ( "_T" (#x02E5)) ( "_H" (#x02E6)) ( "_M" (#x02E7)) ( "_L" (#x02E8)) ( "_B" (#x02E9)) ; Also have some typical ligatures like ts) here: ("kp)" (#\k #x0361 #\p)) ("gb)" (#\g #x0361 #\b)) ("Nm)" (#x014B #x0361 #\m)) ("dz)" (#\d #x0361 #\z)) ; OPTION: (#x02A3) ("dZ)" (#\d #x0361 #x0292)) ; OPTION: (#x02A4) ("dz\\)" (#\d #x0361 #x0291)) ; OPTION: (#x02A5) ("ts)" (#\t #x0361 #\s)) ; OPTION: (#x02A6) ("tS)" (#\t #x0361 #x0283)) ; OPTION: (#x02A7) ("ts\\)" (#\t #x0361 #x0255)) ; OPTION: (#x02A8) ("fN)" (#\f #x0361 #x014B)) ; OPTION: (#x02A9) ("ls)" (#\l #x0361 #\s)) ; OPTION: (#x02AA) ("lz)" (#\l #x0361 #\z)) ; OPTION: (#x02AB) ; Some diththongs (the real list would be endless and should rather ; be programmed systematically. However, that's not so easy, since ; the two parts may be modified with diacritics: e.g. d_0z_0) ("aI)" (#\a #x0361 #x026A)) ("aU)" (#\a #x0361 #x028A)) ("OI)" (#x0254 #x0361 #x026A)) ("OY)" (#x0254 #x0361 #x028F)) ("c\\" (#x0255)) ; deprecated, but in use due to similarity with IPA ) ) (defparameter *charmap-ipa->cxs-ascii* (append '( ; Also translate ISO-8859-1 characters to plain ASCII (#x00E6 "&" "small ae") (#x00E7 "C" "small c with cedilla") (#x00F0 "D" "small eth") (#x00F8 "2" "small o with stroke") ) *charmap-ipa->cxs-iso-8859-1* ) ) (defparameter *ipa-glyph-class* (make-hash-table :test #'eql :initial-contents '( (#x0061 (:ascend nil) (:descend nil)) ; "a" (#x0062 (:ascend t ) (:descend nil)) ; "b" (#x0063 (:ascend nil) (:descend nil)) ; "c" (#x0064 (:ascend t ) (:descend nil)) ; "d" (#x0065 (:ascend nil) (:descend nil)) ; "e" (#x0066 (:ascend t ) (:descend nil)) ; "f" (#x0067 (:ascend nil) (:descend t )) ; "g" (#x0068 (:ascend t ) (:descend nil)) ; "h" (#x0069 (:ascend nil) (:descend nil)) ; "i" (#x006a (:ascend nil) (:descend t )) ; "j" (#x006b (:ascend t ) (:descend nil)) ; "k" (#x006c (:ascend t ) (:descend nil)) ; "l" (#x006d (:ascend nil) (:descend nil)) ; "m" (#x006e (:ascend nil) (:descend nil)) ; "n" (#x006f (:ascend nil) (:descend nil)) ; "o" (#x0070 (:ascend nil) (:descend t )) ; "p" (#x0071 (:ascend nil) (:descend t )) ; "q" (#x0072 (:ascend nil) (:descend nil)) ; "r" (#x0073 (:ascend nil) (:descend nil)) ; "s" (#x0074 (:ascend t ) (:descend nil)) ; "t" (#x0075 (:ascend nil) (:descend nil)) ; "u" (#x0076 (:ascend nil) (:descend nil)) ; "v" (#x0077 (:ascend nil) (:descend nil)) ; "w" (#x0078 (:ascend nil) (:descend nil)) ; "x" (#x0079 (:ascend nil) (:descend t )) ; "y" (#x007a (:ascend nil) (:descend nil)) ; "z" (#x0127 (:ascend t ) (:descend nil)) ; "X\\" "pharyngeal voiceless fricative") (#x014B (:ascend nil) (:descend t )) ; "N" "eng") (#x0153 (:ascend nil) (:descend nil)) ; "9" "oe") (#x0180 (:ascend t ) (:descend nil)) ; "B" "b with stroke: same as beta") (#x01A5 (:ascend t ) (:descend t )) ; "p_<" "p with hook: voiceless implosive?") (#x01AB (:ascend t ) (:descend t )) ; "t_j" "t with palatal hook: ancient for t_j" 1) (#x01AD (:ascend t ) (:descend nil)) ; "t_<" "t with hook: voiceless implosive?") (#x01BB (:ascend t ) (:descend nil)) ; "dz)" "two with stroke: archaic for [dz]; use U+02A3 instead" 1) (#x01C0 (:ascend t ) (:descend nil)) ; "|\\" "dental click") (#x01C1 (:ascend t ) (:descend nil)) ; "|\\|\\" "lateral click") (#x01C2 (:ascend t ) (:descend nil)) ; "=\\" "alveolar click") (#x01C3 (:ascend t ) (:descend nil)) ; "!\\" "retroflex click") (#x0250 (:ascend nil) (:descend nil)) ; "6" "turned a") (#x0251 (:ascend nil) (:descend nil)) ; "A" "script a / latin letter alpha") (#x0252 (:ascend nil) (:descend nil)) ; "Q" "turned script a / latin letter turned alpha") (#x0253 (:ascend t ) (:descend nil)) ; "b_<" "b with hook") (#x0254 (:ascend nil) (:descend nil)) ; "O" "open o") (#x0255 (:ascend nil) (:descend nil)) ; "s\\" "c with curl") (#x0256 (:ascend t ) (:descend t ) (:accent :below)) ; "d`" "d with tail") (#x0257 (:ascend t ) (:descend nil)) ; "d_<" "d with hook") (#x0258 (:ascend nil) (:descend nil)) ; "@\\" "reversed e") (#x0259 (:ascend nil) (:descend nil)) ; "@" "turned e / schwa") (#x025A (:ascend nil) (:descend nil)) ; "@`" "schwa with hook: @ + rhoticity") (#x025B (:ascend nil) (:descend nil)) ; "E" "epsilon") (#x025C (:ascend nil) (:descend nil)) ; "3" "reversed epsilon") (#x025D (:ascend nil) (:descend nil)) ; "3`" "reversed epsilon with hook: 3 + rhoticity") (#x025E (:ascend nil) (:descend nil)) ; "3\\" "closed epsilon") (#x025F (:ascend nil) (:descend t )) ; "J\\" "dotless j with stroke: vcd. palatal stop") (#x0260 (:ascend t ) (:descend t ) (:accent :above)) ; "g_<" "g with hook: implosive") (#x0261 (:ascend nil) (:descend t )) ; "g" "script g == g = vcd. velar stop") (#x0262 (:ascend nil) (:descend nil)) ; "G\\" "sc g = vcd. uvul. stop") (#x0263 (:ascend nil) (:descend t )) ; "G" "gamma: vcd. velar fric.") (#x0264 (:ascend nil) (:descend nil)) ; "7" "rams horn / baby gamma: unr. back. high-mid vowel") (#x0265 (:ascend nil) (:descend t )) ; "H" "turned h: vcd. lab.-pal. approx.") (#x0266 (:ascend t ) (:descend nil)) ; "h\\" "h with hook: vcd. glot. fric.") (#x0267 (:ascend t ) (:descend t ) (:accent :below)) ; "x\\" "heng with hook: sim. S and x") (#x0268 (:ascend nil) (:descend nil)) ; "1" "i with stroke") ; (#x0268 "i\" "i with stroke") ; OPTION (#x0269 (:ascend nil) (:descend nil)) ; "I" "letter iota. obsoleted by sc i (U+026A)" 1) (#x026A (:ascend nil) (:descend nil)) ; "I" "sc i") (#x026B (:ascend t ) (:descend nil)) ; "5" "l with middle tilde: velarised l") (#x026C (:ascend t ) (:descend nil)) ; "K" "l with belt: vcl. alv. lat. fric.") (#x026D (:ascend t ) (:descend t )) ; "l`" "l with retrfl. hook: retroflex lateral") (#x026E (:ascend t ) (:descend t )) ; "K\\" "lezh: vcd. alv. lat. fric.") (#x026F (:ascend nil) (:descend nil)) ; "M" "turned m") (#x0270 (:ascend nil) (:descend t )) ; "M\\" "turned m with long leg") (#x0271 (:ascend nil) (:descend t )) ; "F" "turned m with hook") (#x0272 (:ascend nil) (:descend t )) ; "J" "n with left hook") (#x0273 (:ascend nil) (:descend t )) ; "n`" "n with retrfl. hook") (#x0274 (:ascend nil) (:descend nil)) ; "N\\" "sc n") (#x0275 (:ascend nil) (:descend nil)) ; "8" "barred o") (#x0276 (:ascend nil) (:descend nil)) ; "&\\" "sc oe") (#x0277 (:ascend nil) (:descend nil)) ; "U" "closed omega: better use small upsilon (U+028A)" 1) (#x0278 (:ascend t ) (:descend t )) ; "p\\" "phi") (#x0279 (:ascend nil) (:descend nil)) ; "r\\" "turned r") (#x027A (:ascend t ) (:descend nil)) ; "l\\" "turned r with long leg") (#x027B (:ascend nil) (:descend t )) ; "r\\`" "turned r with hook") (#x027C (:ascend nil) (:descend t )) ; "r\\_r" "r with long leg: obsoleted by r + raised" 1) (#x027D (:ascend nil) (:descend t )) ; "r`" "r with tail") (#x027E (:ascend nil) (:descend nil)) ; "4" "r with fishhook") (#x027F (:ascend nil) (:descend nil)) ; "z=" "reversed r with fishhook: apical dental vowel: use syllabic z" 1) (#x0280 (:ascend nil) (:descend nil)) ; "R\\" "sc r: uvul. trill") (#x0281 (:ascend nil) (:descend nil)) ; "R" "inverted sc r: uvul. vcd. fric.") (#x0282 (:ascend nil) (:descend t )) ; "s`" "s with hook") (#x0283 (:ascend t ) (:descend t )) ; "S" "esh") (#x0284 (:ascend t ) (:descend t )) ; "J\\_<" "dotless j with stroke and hook: patal. vcd. impl.") (#x0285 (:ascend t ) (:descend nil)) ; "z`=" "squat reversed esh: apical retr. vowel: use syllabic z`" 1) (#x0286 (:ascend t ) (:descend t )) ; "S_j" "esh with curl: use palatal S" 1) (#x0287 (:ascend t ) (:descend nil)) ; "|\\" "turned t: dental click. obsolete: use U+01C0" 1) (#x0288 (:ascend t ) (:descend t ) (:accent :above)) ; "t`" "t with retrfl. hook") (#x0289 (:ascend nil) (:descend nil)) ; "u\\" "letter u bar") (#x028A (:ascend nil) (:descend nil)) ; "U" "upsilon") (#x028B (:ascend nil) (:descend nil)) ; "P" "v with hook") ; (#x028B "v\\") ; OPTION (#x028C (:ascend nil) (:descend nil)) ; "V" "turned v") (#x028D (:ascend nil) (:descend nil)) ; "W" "turned w") (#x028E (:ascend t ) (:descend nil)) ; "L" "turned y: palat. lat. approx.") (#x028F (:ascend nil) (:descend nil)) ; "Y" "sc y") (#x0290 (:ascend nil) (:descend t )) ; "z`" "z with retr. hook") (#x0291 (:ascend nil) (:descend nil)) ; "z\\" "z with curl") (#x0292 (:ascend nil) (:descend t )) ; "Z" "ezh / yogh") (#x0293 (:ascend nil) (:descend t )) ; "Z_j" "ezh with curl: palatalised vcd. postalv. fric." 1) (#x0294 (:ascend t ) (:descend nil)) ; "?" "glottal stop") (#x0295 (:ascend t ) (:descend nil)) ; "?\\" "pharyngeal vcd. fric.") (#x0296 (:ascend t ) (:descend nil)) ; "|\\|\\" "inverted glottal stop: lateral click. obsolete: use U+01C1" 1) (#x0297 (:ascend nil) (:descend t )) ; "!\\" "stretched c: palatal / alveolar click. obsolete: use U+01C3" 1) (#x0298 (:ascend t ) (:descend nil)) ; "O\\" "bilabial click") (#x0299 (:ascend nil) (:descend nil)) ; "B\\" "sc b") (#x029A (:ascend nil) (:descend nil)) ; "&\\" "closed open E (sic!): non-IPA for sc oe, use U+0276 instead" 1) (#x029B (:ascend t ) (:descend nil)) ; "G\\_<" "sc g with hook") (#x029C (:ascend nil) (:descend nil)) ; "H\\" "sc h: vcl. epiglottal fric.") (#x029D (:ascend nil) (:descend t )) ; "j\\" "j with crossed-tail") (#x029F (:ascend nil) (:descend nil)) ; "L\\" "sc l: velar lat. approx.") (#x02A0 (:ascend t ) (:descend t ) (:accent :above)) ; "q_<" "q with hook: vcl. uvul. impl.") (#x02A1 (:ascend t ) (:descend nil)) ; ">\\" "glottal stop with stroke: voiced epiglottal stop") (#x02A2 (:ascend t ) (:descend nil)) ; "<\\" "reversed glottal stop with stroke: voiced epiglottal stop") (#x02A3 (:ascend t ) (:descend nil)) ; "dz)" "dz digraph: FIXME: Unicode 3.2 says:") (#x02A4 (:ascend t ) (:descend t )) ; "dZ)" "dezh digraph") ; (#x02A4 "d_Z") ; OPTION (#x02A5 (:ascend t ) (:descend nil)) ; "dz\\)" "dz digraph with curl") ; (#x02A5 "d_z\\") ; OPTION (#x02A6 (:ascend t ) (:descend nil)) ; "ts)" "ts digraph: FIXME: see U+02A3 above") ; (#x02A6 "t_s") ; OPTION (#x02A7 (:ascend t ) (:descend t )) ; "tS)" "tesh digraph") ; (#x02A7 "t_S") ; OPTION (#x02A8 (:ascend t ) (:descend nil)) ; "ts\\)" "ts digraph with curl") ; (#x02A8 "t_s\\") ; OPTION (#x02A9 (:ascend t ) (:descend t )) ; "fN)" "feng digraph") ; (#x02A9 "f_N") ; OPTION (#x02AA (:ascend t ) (:descend nil)) ; "ls)" "ls digraph") ; (#x02AA "l_s") ; OPTION (#x02AB (:ascend t ) (:descend nil)) ; "lz)" "lz digraph") ; (#x02AB "l_z") ; OPTION (#x02AC (:ascend t ) (:descend nil)) ; "._w_w" "FIXME: I made this up!") (#x02AD (:ascend t ) (:descend nil)) ; "._d_d" "FIXME: I made this up!") (#x03B2 (:ascend t ) (:descend t ) (:accent :below)) ; "B" "beta") (#x03B8 (:ascend t ) (:descend nil)) ; "T" "theta") (#x03C7 (:ascend nil) (:descend t )) ; "X" "chi: uvul. vcl. fric.") ) ) ) (defparameter *ipa-modifier* ; Additional to those that are combining ; Attribute :skip is implemented for deciding whether ; it is ts)_h (:skip nil) or ts_h) (:skip t). (make-hash-table :test #'eql :initial-contents '( (#x02b0 (:modif t)) ; ts)_h (#x02B0 "_h" "aspirated; small h") (#x02b1 (:modif t)) ; dz)_t (#x02B1 "_t" "small h with hook: breathy voiced") (#x02b2 (:modif t) (:skip t)) ; ts_j) or ts)_j ? (#x02B2 "_j" "small j: palatalised") (#x02b7 (:modif t) (:skip t)) ; i2_w) (#x02B7 "_w" "small w") (#x02b8 (:modif t) (:skip t)) ; (#x02B8 "_j" "small y: usage= small j") (#x02bc (:modif t)) ; ts)_> (#x02BC "_>" "apostrophy: glottalised/ejective: (#x02c0 (:modif t)) ; (#x02C0 "_>" "small glot.stop: glottalised/ejective; (#x02c7 (:modif t)) ; aI)_F_R (#x02C7 "_F_R" "caron: falling-rising (Mandarin 3rd) tone") ; OPTION (#x02c8 (:modif t)) ; aI)' (#x02C8 "'" "primary stress") ; (#x02C8 "\"") ; OPTION (#x02c9 (:modif t)) ; (#x02C9 "_T" "spacing macron: high (Mandarin 1st) tone; (#x02cc (:modif t)) ; (#x02CC "\"" "letter vert. line below: secondary stress") ; (#x02CC "%") ; OPTION (#x02cd (:modif t)) ; (#x02CD "_L" "letter low macro: low level tone; (#x02ce (:modif t)) ; (#x02CE "_L_B" "letter low grave: low-falling tone") ; OPTION (#x02cf (:modif t)) ; (#x02CF "_B_L" "low-rising tone") ; OPTION (#x02d0 (:modif t)) ; aI): or aI:) ? (#x02D0 ":" "triangular colon: length mark") (#x02d1 (:modif t)) ; (#x02D1 ":\\" "half triangular colon: half-long") (#x02d2 (:modif t) (:skip t)) ; i2_O) (#x02D2 "_O" "centred right half ring: more rounded; (#x02d3 (:modif t) (:skip t)) ; (#x02D3 "_c" "centred left half ring: less rounded; (#x02d4 (:modif t) (:skip t)) ; (#x02D4 "_r" "up tack: raised; use U+031D instead") ; OPTION (#x02d5 (:modif t) (:skip t)) ; (#x02D5 "_o" "down tack: lowered; use U+031E instead") ; OPTION (#x02d6 (:modif t) (:skip t)) ; (#x02D6 "_+" "plus: advanced; use U+031F instead") ; OPTION (#x02d7 (:modif t) (:skip t)) ; (#x02D7 "_-" "minus: retracted; use U+0320 instead") ; OPTION (#x02d8 (:modif t) (:skip t)) ; ui_X) vs. u_Xi) (#x02D8 "_X" "breve: extra-short; use U+0306 instead") ; OPTION ; uj) wi) (#x02da (:modif t) (:skip t)) ; tz_0) (#x02DA "_0" "ring above: voiceless (on characters with a descender); (#x02dc (:modif t) (:skip t)) ; au)~ or au~) ? (#x02DC "~" "small tilde: nasalised; use U+0303 instead"); (#x02DC "_~") ; OPTION (#x02dd (:modif t)) ; (#x02DD "_T" "double acute accent: extra high level tone; (#x02de (:modif t) (:skip t)) ; (#x02DE "`" "rhotic hook") (#x02e0 (:modif t) (:skip t)) ; (#x02E0 "_G" "small gamma: velarised") (#x02e1 (:modif t)) ; (#x02E1 "_l" "small l: lateral release") (#x02e5 (:modif t) (:skip t)) ; (#x02E4 "_?\\" "reversed glot.stop: pharyngealised") (#x02e5 (:modif t)) ; (#x02E5 "_T" "level tone: extra high (top); accent variant is U+030B") ; (#x02E5 "._T" "level tone: extra high (top); accent variant is U+030B" 1) (#x02e6 (:modif t)) ; (#x02E6 "_H" "level tone: high; accent variant is U+0301") ; (#x02E6 "._H" "level tone: high; accent variant is U+0301" 1) (#x02e7 (:modif t)) ; (#x02E7 "_M" "level tone: mid; accent variant is U+0304") ; (#x02E7 "._M" "level tone: mid; accent variant is U+0304" 1) (#x02e8 (:modif t)) ; (#x02E8 "_L" "level tone: low; accent variant is U+0300") ; (#x02E8 "._L" "level tone: low; accent variant is U+0300" 1) (#x02e9 (:modif t)) ; (#x02E9 "_B" "level tone: extra low (bottom); accent variant is U+030F") ; (#x02E9 "._B" "level tone: extra low (bottom); accent variant is U+030F" 1) (#x02ec (:modif t) (:skip t)) ; ds_v) (#x02EC "_v" "letter voicing; use U+032C instead") ; OPTION ) ) ) (provide 'charmap-cxs)