; -*- Mode: Lisp -*- ; ; Author: Henrik Theiling ; ; Description: ; ; Provides the module 'WORDS and the functions COMPOSE-WORD and ; DECOMPOSE-WORD. These can be used by the lexicon implementation ; to implement the functions STEM and COMPOSITE that are used by ; the lexicon to compute the entries. ; ;; ;; Darstellung: ;; 1 number 0...999: word variant ;; n number 0...999: phonemes ;; ;; E.g. (123 754 329) ;; ;; Meta-phonemes may be interpreted depending on syllable type and ;; number of meta-phonemes. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (eval-when (:load-toplevel :compile-toplevel :execute) (make-package :lex) ) (provide 'words) ; ;(export ; '(lex::make-lexicon ; lex::stem ; function needs lex::*phonotactics* to be defined ; lex::composite ; function needs lex::*phonotactics* to be defined ; ) ;) (eval-when (:load-toplevel :compile-toplevel :execute) (make-package :words) ) (in-package :words) ;; and some in the words package. (export '(compose-word make-structure decompose-word random-word *meta-max* ) ) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defvar *meta-max* 1000 "The range for meta-phonemes. Default: 0...999" ) (defun trigger-lookup (list select) "Parameters: 'LIST: list of pairs (VALUE TRIGGER) 'SELECT: selection value Result: One of the VALUEs. Used to select an entry from a list with weighted entries. Returns one VALUE if the sum of all previous TRIGGERs is greater than the given selection value. If the TRIGGER is not given, immediately returns the VALUE without further searching. " (let ((trigger (cadar list))) (if (not (numberp trigger)) (caar list) (let ((trigger-int (floor (* *meta-max* trigger)))) (if (< select trigger-int) (caar list) (trigger-lookup (cdr list) (- select trigger-int)) ) ) ) ) ) (defun nth-non-nil (index list &key (default nil)) "Selects the INDEX'th entry from LIST or a previous one of that is given or the default (if given) " (cond ((null list) default ) ((null (first list)) (nth-non-nil (1- index) (rest list) :default default) ) ((<= index 0) (first list) ) (t (nth-non-nil (1- index) (rest list) :default (first list)) ) ) ) (defun remove-list-perhaps (x) (if (listp x) (first x) x ) ) (defun lookup-phonemes (phoneme-pattern spec) "By a given trigger list PHONEME-PATTERN, return a list of phonemes the SPEC list selects. " (cond ((null phoneme-pattern) nil ) ((null spec) nil ) (t (cons (remove-list-perhaps (trigger-lookup (first phoneme-pattern) (first spec)) ) (lookup-phonemes (rest phoneme-pattern) (rest spec)) ) ) ) ) (defun random-length (structure) ;; repeats until an entry was found. (let ((n (1+ (random (length structure))))) (if (null (nth (1- n) structure)) (random-length structure) n ) ) ) (defun valid-weight-of (x) (when (and (listp x) (numberp (second x))) (second x) ) ) (defun has-weights (entries) (catch 'result (mapc #'(lambda (x) (when (valid-weight-of x) (throw 'result t) ) ) entries ) nil ) ) (defun re-weigh-part (entries) (catch 'give-up (let ((total-weight (if (has-weights entries) (reduce #'+ entries :key #'(lambda (x) (or (valid-weight-of x) (throw 'give-up entries) ) ) :initial-value 0 ) (length entries) ) ) ) (mapcar #'(lambda (x) (list (if (stringp x) x (first x)) (/ (or (valid-weight-of x) 1) total-weight) ) ) entries ) ) ) ) (defun re-weigh-list-of-parts (list-of-parts) (mapcar #'re-weigh-part list-of-parts ) ) (defun make-structure (entries &key (global-weight nil)) "Parameters ENTRIES: A flat list of descriptions of words. Each entry is a list. Output: STRUCTURE: A structure suitable for use with many functions in this library. The entries are simply discributed by the length of their first element into appropriate entries of a new list. I.e., a new top-level list is created and each element at position N of this list is a list of entries that have first elements of length (n+1). The relative order of entries is kept. Apart from splitting the entries into lists, it also normalises the given weights for each part of each entry. Weights can also be left out. There are two cases of lists: (\"a\" \"b\" ... \"z\") -- will be assigned equal weight ((\"a\" 4) ... (\"z\" 10)) -- weights will be normalised In the second case, normalisation only takes place if all weights are given. Please note that if you weigh the entries themselves (the second element of an entry), this weight is valid inside its group of equally length partners. There is no way of giving a global weight to entries or a weight to lengths of entries. However, if the parameter GLOBAL-WEIGHT is given, then the function assumes that for each entry, a weight is given and re-calculates and normalises the weights for each group in such a way that the relative weight per group is kept. To get the correct global weight for randomisation, a second list is output with weights for the groups. This is suitable for random-word to select an appropritate length. By this mechanism, global weight can actually correctly be defined." (let ((structure (make-list (reduce #'max entries :key #'(lambda (x) (length (first x))) :initial-value 0 ) ) ) ) (mapc #'(lambda (x) (push x (nth (1- (length (first x))) structure))) entries ) (setq structure (mapcar #'reverse structure)) ;; restore relative original order. (if global-weight (let* ( ;; step one: compute the non-normalised weight for each group (length-weight (mapcar #'(lambda (eq-len-entries) (reduce #'+ eq-len-entries :key #'second :initial-value 0 ) ) structure ) ) (total-weight (apply #'+ length-weight)) ) (values ;; 1st: normalsed per entry (mapcar #'(lambda (eq-len-entries local-weight) (mapcar #'(lambda (variant) (when variant (list (re-weigh-list-of-parts (first variant)) ;; local re-weighing (/ (second variant) local-weight) ;; entry re-weighing ) ) ) eq-len-entries ) ) structure length-weight ) ;; 2nd: normalised length weights (mapcar #'(lambda (x) (/ x total-weight)) length-weight) ;; weights per length ) ) structure ;; no re-weighting ) ) ) (defun random-word (structure &optional length) "Parameters: STRUCTURE: The structure of a word. &optional: LENGTH: If given, use this length instead of choosing a random one. Must be >= 1 and <= length (structure) This function by random chooses a pattern from the selection list, and then invokes compose-word with a random word structure to make a word of the appropriate length. This function returns NIL of the entry selected by a given length is NIL and never returns (possibly with a lisp stack overflow) if due to the structure, no appropriate length can be found." (catch 'result (let* ( ;; Filter out those that have a wrong length if ;; length is given. (cooked-structure (if length (append (make-list (1- length)) (list (let ((entry (nth (1- length) structure))) (when (null entry) (throw 'result nil)) entry ) ) ) structure ) ) ;; Find an length (the entry in structure must be non-empty) (len (or length (random-length cooked-structure))) ;; Make a random list of appropriate length. (rand-list (mapcar #'(lambda (x) (declare (ignore x)) (random *meta-max*) ) (make-list (1+ len)) ) ) ) (compose-word structure rand-list) ) ) ) (defun compose-word (structure spec) "Parameters: STRUCTURE: The structure of a word. SPEC: A selection list STRUCTURE is a list. It is indexed with the number of available phonemes. The indexing starts with 1. If the indexing yields NIL, the previous entry is used, so that enough phonemes exist. Each entry is a weighted list suitable for trigger-lookup. The values of each trigger list are lists of trigger-lists for phoneme selection. SPEC is a list. The length of the list selects the length of the word. The first entry is a value 0..999 selecting the variant. All following entries are 0..999 selecting a phoneme. " (let ((phoneme-selection (trigger-lookup (nth-non-nil (- (length spec) 2) structure) (first spec) ; lookup variation ) ) ) (when phoneme-selection (apply #'concatenate 'string (lookup-phonemes phoneme-selection (rest spec)) ) ) ) ) (defun get-trigger-int (x current-max) (cond ((numberp x) (floor (* *meta-max* x)) ) ((< current-max *meta-max*) (- *meta-max* current-max) ) (t (throw :error "sum of weights is > 1") ) ) ) (defun add-random (add-random trigger-offset trigger-int) (floor (+ trigger-offset (* add-random (random trigger-int)))) ) (defun find-matches (sub-finder list word add-random &optional (trigger-offset 0)) "LIST has the following structure: ( (VALUE_1 TRIGGER_1) ... (VALUE_n-1 TRIGGER_n-1) (VALUE_n) ) SUB-FINDER is invoked like this: (SUB-FINDER VALUE_i WORD ADD-RANDOM) for each i <- 1.. n " (unless (null list) (let ((trigger-int (get-trigger-int (cadar list) trigger-offset))) (nconc (mapcar #'(lambda (one-result) (cons (add-random add-random trigger-offset trigger-int) one-result ) ) (funcall sub-finder (caar list) word add-random ) ) (find-matches sub-finder (cdr list) word add-random (+ trigger-offset trigger-int) ) ) ) ) ) (defun phoneme-match (prefix word) "Note: the result of this function is the suffix of WORD if it matches, nil otherwise. We return a list of the result to suit the needs of the function find-matches. " (let ((len (length prefix))) (when (and (>= (length word) len) (string= prefix word :end2 len) ) (list (subseq word len)) ) ) ) (defun phoneme-list-match (prefix-list word) (cond ((null prefix-list) nil ) ((listp prefix-list) (or (phoneme-match (first prefix-list) word) (phoneme-list-match (rest prefix-list) word) ) ) (t (phoneme-match prefix-list word) ) ) ) (defun phoneme-list-match-sort (prefix-list word add-random) (declare (ignore add-random)) (phoneme-list-match (if (listp prefix-list) (sort (copy-list prefix-list) #'> :key #'length) ;; Compare longest prefixes first. Otherwise, zero length matches are bad. prefix-list ) word ) ) (defun phoneme-find-matches (list word add-random) (find-matches #'phoneme-list-match-sort list word add-random) ) (defun pattern-find-matches (list word add-random &optional accu) "LIST has the following structure: (LIST_1 LIST_2 ... LIST_n) Each list contains a pattern match for a prefix of word. WORD is a string. " (declare (special allow-prefix)) (cond ((null list) (when (or allow-prefix (string= word "") ) (list (reverse accu)) ) ) ((string= word "") nil ) (t ; both LIST and WORD are non-empty (mapcan #'(lambda (step-result) ;; step-result will be a list of dotted lists: ;; ((TRIGGER-VALUE . REST-WORD) ...) ;; TRIGGER-VALUE is inserted by find-matches, REST-WORD is the ;; result of the successful PHONEME-MATCH. (pattern-find-matches (rest list) (cdr step-result) add-random (cons (car step-result) accu ) ) ) (phoneme-find-matches (car list) word add-random) ) ) ) ) ;; I could've written the following two functions in one line in ;; decompose-word, but the lambda-nesting level would be wild. (defun variant-find-matches (list word add-random) (find-matches #'pattern-find-matches list word add-random) ) (defun decompose-word (structure word &key (add-random 0.0) (allow-prefix nil)) "Given a WORD (a string) and a STUCTURE, returns all descriptions for that word found in STRUCTURE. This is the reverse of COMPOSE-WORD. If ADD-RANDOM is >= 0.0, the returned values will randomly vary in their according range multiplied by ADD-RANDOM (starting at the beginning of the range). Note that this function is much slower than COMPOSE-WORD, because the whole structure has to be searched, all possiblities must be gathered and many string comparisons have to be performed. " ;; For this function, it would have been nice to have prolog. The whole ;; back-tracking has to be performed by hand. Kotz. Nice to have mapcan. (declare (special allow-prefix)) (mapcan #'(lambda (variant-list) (variant-find-matches variant-list word add-random ) ) structure ) ) ;---------------------------------------------------------------------- ;; Lexicon parser: (defun reduce-lexicon (entry-type arguments) (mapcar #'(lambda (entry) (cond ((and (listp (first entry)) (eq (caar entry) entry-type) ) (list (eval (first entry)) (second entry)) ) (t entry ) ) ) arguments ) ) (defun make-lexicon-aux (arguments) (let ((*hash-stems* (make-hash-table :test #'equalp))) (declare (special *hash-stems*)) (reduce-lexicon 'lex::composite (reduce-lexicon 'lex::stem arguments ) ) ) ) ;---------------------------------------------------------------------- ; stem (defun concat-with-dots (radicals) (cond ((null radicals) "" ) ((= (length radicals) 1) (string (first radicals)) ) (t (concatenate 'string (string (first radicals)) "." (concat-with-dots (rest radicals)) ) ) ) ) (defun stem-aux (number phondef &optional radicals) (declare (special lex::*phonotactics* *hash-stems*)) ;(format t "~&DEBUG: ~a~&" radicals) (let ((result (concatenate 'string (compose-word lex::*phonotactics* phondef) (if radicals "<" "") (concat-with-dots radicals) (if radicals ">" "") ) ) ) (if (gethash number *hash-stems*) (error "Error: Stem ~a is already defined in lexicon." number) (setf (gethash number *hash-stems*) result) ) (format t "~&found stem: ~a~&" result) (intern (string-upcase result) :lex) ) ) ;---------------------------------------------------------------------- ; The lexicon interface (defmacro lex::make-lexicon (&rest arguments) `(make-lexicon-aux ',arguments) ) (defmacro lex::stem (number phonemes &optional radicals) `(stem-aux ,number ',phonemes ',radicals) ) ;; end