; CXS -- Conlang X-Sampa ; ; Converts CXS->IPA and IPA->CXS ; (require "charmap-cxs") (require "charmap-cxs-class") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; tools: (defun cxs-second-or-cdr (x) (if (listp (cdr x)) (second x) (cdr x))) (defgeneric cxs-char-code (x)) (defmethod cxs-char-code ((x integer)) x ) (defmethod cxs-char-code ((x character)) (char-code x) ) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; IPA->CXS ; cache: (defvar /ipa->cxs/ (make-hash-table :test #'eql)) ; maps integers to strings ; learn: (mapc #'(lambda (x) (setf (gethash (cxs-char-code (first x)) /ipa->cxs/) (string (cxs-second-or-cdr x)) ) ) *charmap-ipa->cxs-ascii* ) ; convert: (defgeneric ipa->cxs (x)) ; - always returns a string. ; - may be invoked with ints, characters, strings, and vectors of these. (defmethod ipa->cxs ((x integer)) (or (gethash x /ipa->cxs/) (string (character x)) ) ) (defmethod ipa->cxs ((x character)) (ipa->cxs (char-code x)) ) (defmethod ipa->cxs ((x sequence)) (apply 'concatenate (cons 'string (map 'list #'ipa->cxs x))) ) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; CXS->IPA (defstruct cxs-xlat-node replacement children) ; cache: (defvar /cxs->ipa/ (make-cxs-xlat-node)) ; indexed with integers ; convert any atom or sequence to a vector of unicode characters: (defgeneric cxs-char-code-vector (x)) (defmethod cxs-char-code-vector ((x character)) (vector (char-code x)) ) (defmethod cxs-char-code-vector ((x number)) (vector x) ) (defmethod cxs-char-code-vector ((x sequence)) (apply #'concatenate (cons 'vector (map 'list #'cxs-char-code-vector x) ) ) ) ; learn: (defun cxs-compose-get-node (node index) (unless (cxs-xlat-node-children node) (setf (cxs-xlat-node-children node) (make-hash-table :test #'eql)) ) (or (gethash index (cxs-xlat-node-children node)) (let ((new-sub-node (make-cxs-xlat-node))) (setf (gethash index (cxs-xlat-node-children node)) new-sub-node) new-sub-node ) ) ) (defun cxs-add-compose (node seq index replacement) (if (< index (length seq)) ;then (cxs-add-compose (cxs-compose-get-node node (aref seq index)) seq (1+ index) replacement ) ;else (when (and (not (fourth replacement)) ; not deprecated (or (null (cxs-xlat-node-replacement node)) ; don't overwrite otherwise (gethash replacement *ipa-combining-class*) ; prefer combining over standalone ) ) (setf (cxs-xlat-node-replacement node) (first replacement)) ) ) ) ; learn main stuff: (mapc #'(lambda (x) (cxs-add-compose /cxs->ipa/ (cxs-char-code-vector (cxs-second-or-cdr x)) 0 x) ) *charmap-ipa->cxs-ascii* ) ; learn supplement: (mapc #'(lambda (x) (cxs-add-compose /cxs->ipa/ (cxs-char-code-vector (first x)) 0 (cxs-second-or-cdr x)) ) *charmap-cxs->ipa-supplement* ) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defun cxs->ipa-find (node seq index replacement tail-index) ;(format t "~&~a ~a ~a ~a~&" seq index replacement tail-index) (if (< index (length seq)) ;then (let* ((children (cxs-xlat-node-children node)) (new-node (and children (gethash (aref seq index) children))) ) (if new-node ;then (let ((new-replacement (cxs-xlat-node-replacement new-node)) (new-index (1+ index)) ) (if new-replacement (cxs->ipa-find new-node seq new-index new-replacement new-index) (cxs->ipa-find new-node seq new-index replacement tail-index) ) ) ;else (values replacement tail-index) ) ) ;else (values replacement tail-index) ) ) (defun cxs->ipa-convert (seq index) (if (< index (length seq)) ;then (multiple-value-bind (replacement next-index) (cxs->ipa-find /cxs->ipa/ seq index nil 0) (if replacement ;then (concatenate 'vector (cxs-char-code-vector replacement) (cxs->ipa-convert seq next-index) ) ;else (concatenate 'vector (subseq seq index (1+ index)) (cxs->ipa-convert seq (1+ index)) ) ) ) ;else (vector) ) ) (defun cxs->ipa (x) ; - always returns a vector of integers (=unicode chars) ; - may be invoked with ints, characters, string, and vector of these (cxs->ipa-convert (cxs-char-code-vector x) 0) ) (provide 'cxs)