#!/usr/bin env python
# -*- coding: UTF-8 -*-
import pprint
import unicodedata

def make_table(Deffile):
    passthrough = 'abcdefghijklmnopqrstuvwxyz'
    table = dict(zip(passthrough, [(unicode(p), False, 0) for p in passthrough]))
    for line in Deffile:
        tokens = line.split()
        if line.startswith('#'):
            continue
        if len(tokens) < 3 or tokens[1] not in ('=', '=>'):
            continue
        key, dir, data = tokens[0], tokens[1], tokens[2:]
        if dir == '=':
            deprecated = False
            if len(data) == 2:
                deprecated = data[1]
            elif len(data) > 2:
                print 'Error!', data, len(data)
                break
            key, data = data[0], key
            char = unichr(int(data, 16))
            table[key] = (char, deprecated, 0)
        if dir == '=>':
            data = ''.join([unichr(int(char, 16)) for char in data[0].split('+')])
            table[key] = (data, False, 1)
    return table

def ucipa2cxs(utext, table):
    "utext is a unicode-string"
    assert unicode(utext) == utext
    table = dict(
        [(data[0], key) for key, data in table.iteritems()
            if not data[-1]])
    outtext = []
    for uchar in utext:
        try:
            char = table[uchar]
        except KeyError:
            char = '\\'
        outtext.append(char)
    return ''.join(outtext)

def cxs2uipa(text, table):
    outtext = []
    seq = table.keys()
    seq.sort()
    seq.reverse()
    while text:
        for char in seq:
            if text.startswith(char):
                chars = table[char][0]
                outtext.append(chars)
                text = text[len(char):]
                break
        else:
            break
    return ''.join(outtext)

if __name__ == '__main__':
    F = file('CXS.def')
    cxs_table = make_table(F)
    ipatext = unicode('ʊ̶ʋɓɟʙɮɪ̵', 'utf8')
    ipatext = unicode('ɓa͡ɪɓ', 'utf8')
    
    cxstext = ucipa2cxs(ipatext, cxs_table)
    ipatext2 = cxs2uipa(cxstext, cxs_table)
    
    if ipatext == ipatext2:
        print 'Test passed:', ipatext2.encode('utf8')
    else:
        print '%s != %s' % (ipatext, ipatext2)

