Human readable base conversion

Code review time… In a conversation about URL shorteners and “Coke Rewards” realized that there was a case where I needed to be able to generate safe character strings that had high reliability for input back by human beings. The typical Base62 systems where there is ambiguity between (O, o and 0) make things hard (along with all of those upper vs. lower case cases).

Here’s the quick module I put together that is a safe base converter to human readable numbers.

import types

class BaseConverter(object):
    """ Convert a number between two bases of digits, by default it's a human safe set 

    >>> v = BaseConverter(BaseConverter.BASE10)
    >>> v.to_decimal(22)
    22
    >>> v.from_decimal(22)
    '22'

    >>> v = BaseConverter(BaseConverter.BASE2)
    >>> v.to_decimal(22)
    Traceback (most recent call last):
        ...
    ValueError: character '2' not in base
    >>> v.to_decimal(10)
    2
    >>> v.to_decimal('10')
    2
    >>> v.from_decimal(22)
    '10110'

    >>> v = BaseConverter()
    >>> v.to_decimal(22)
    58
    >>> v.from_decimal(123123)
    '5h17'
    >>> v.to_decimal('5H17')
    123123

    >>> v = BaseConverter(BaseConverter.BASE62)
    >>> v.from_decimal(257938572394L)
    '4XYBxik'
    >>> v.to_decimal('4XYBxik')
    257938572394

    >>> v = BaseConverter((('Zero ',),('One ',)))
    >>> v.from_decimal(BaseConverter(BaseConverter.BASE2).to_decimal('1101'))
    'One One Zero One '

    """

    HUMAN_TABLE = (
        ('0','O','o','Q','q'),
        ('1','I','i','L','l','J','j'),
        ('2','Z','z'),
        ('3',),
        ('4',),
        ('5','S','s'),
        ('6',),
        ('7',),
        ('8',),
        ('9',),
        ('a','A',),
        ('b','B',),
        ('c','C',),
        ('d','D',),
        ('e','E',),
        ('f','F',),
        ('g','G',),
        ('h','H',),
        ('k','K',),
        ('m','M',),
        ('n','N',),
        ('p','P',),
        ('r','R',),
        ('t','T',),
        ('u','U','V','v'),
        ('w','W',),
        ('x','X',),
        ('y','Y',),
    )

    BASE2  = "01"
    BASE10 = "0123456789"
    BASE62 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
    BASE16 = (
        ('0',),
        ('1',),
        ('2',),
        ('3',),
        ('4',),
        ('5',),
        ('6',),
        ('7',),
        ('8',),
        ('9',),
        ('A','a',),
        ('B','b',),
        ('C','c',),
        ('D','d',),
        ('E','e',),
        ('F','f',),
    )

    def __init__(self, digitset=HUMAN_TABLE):
        if type(digitset) in (types.StringType, types.UnicodeType) :
            self.digitset = [(v) for v in digitset]
        else :
            self.digitset = digitset

        self.base = len(self.digitset)
        self.output_map = {}

        self.output_digits = [v[0] for v in self.digitset]
        self.input_set = {}
        for idx, l in enumerate(self.digitset) :
            for k in l :
                self.input_set[k] = idx

        #print 'OUT DIGITS', self.output_digits
        #print 'INPUT SET', self.input_set

    def from_decimal(self, i):
        return self.convert(i, self.BASE10, self.output_digits)

    def to_decimal(self, s):
        return int(self.convert(s, self.input_set, self.BASE10))

    def convert(self, number, fromdigits, todigits) :
        fd    = fromdigits
        fbase = self.base
        if type(fromdigits) in (types.StringType, types.UnicodeType) :
            fbase = len(fromdigits)
            fd    = dict([(fromdigits[idx], idx) for idx in range(0,len(fromdigits))])

        return self._convert(number, fbase, fd, todigits)

    @staticmethod
    def _convert(number, fbase, fromdigits, todigits) :
        # Based on http://code.activestate.com/recipes/111286/
        number = str(number)

        if number[0] == '-':
            number = number[1:]
            neg = 1
        else:
            neg = 0

        # make an integer out of the number
        x     = 0
        #print "fbase = ", len(fromdigits)
        for digit in number :
            try :
                x = x * fbase + fromdigits[digit]
            except KeyError, e:
                raise ValueError("character '%s' not in base" % digit)

        # create the result in base 'len(todigits)'
        tbase = len(todigits)
        if x == 0:
            res = todigits[0]
        else:
            res = ""
            while x > 0:
                #print "divmod(%d, %d) = %r" % (x, tbase, divmod(x,tbase))
                x, digit = divmod(x, tbase)
                res = todigits[digit] + res
            if neg:
                res = '-' + res
        return res

binary   = BaseConverter(BaseConverter.BASE2)
hex      = BaseConverter(BaseConverter.BASE16)
base62   = BaseConverter(BaseConverter.BASE62)
human    = BaseConverter()

if __name__ == '__main__' :
    import doctest
    import random
    doctest.testmod()