Human readable base conversion

Code review time… In a conversation about URL shorteners and “Coke Rewards” realized that there was a case where I needed to be able to generate safe character strings that had high reliability for input back by human beings. The typical Base62 systems where there is ambiguity between (O, o and 0) make things hard (along with all of those upper vs. lower case cases).

Here’s the quick module I put together that is a safe base converter to human readable numbers.

import types

class BaseConverter(object):
    """ Convert a number between two bases of digits, by default it’s a human safe set

    >>> v = BaseConverter(BaseConverter.BASE10)
    >>> v.to_decimal(22)
    22
    >>> v.from_decimal(22)
    ’22’

    >>> v = BaseConverter(BaseConverter.BASE2)
    >>> v.to_decimal(22)
    Traceback (most recent call last):
    ValueError: character ‘2’ not in base
    >>> v.to_decimal(10)
    2
    >>> v.to_decimal(’10’)
    2
    >>> v.from_decimal(22)
    ‘10110’

    >>> v = BaseConverter()
    >>> v.to_decimal(22)
    58
    >>> v.from_decimal(123123)
    ‘5h17’
    >>> v.to_decimal(‘5H17’)
    123123

    >>> v = BaseConverter(BaseConverter.BASE62)
    >>> v.from_decimal(257938572394L)
    ‘4XYBxik’
    >>> v.to_decimal(‘4XYBxik’)
    257938572394

    >>> v = BaseConverter(((‘Zero ‘,),(‘One ‘,)))
    >>> v.from_decimal(BaseConverter(BaseConverter.BASE2).to_decimal(‘1101’))
    ‘One One Zero One ‘

    """

    HUMAN_TABLE = (
        (0,O,o,Q,q),
        (1,I,i,L,l,J,j),
        (2,Z,z),
        (3,),
        (4,),
        (5,S,s),
        (6,),
        (7,),
        (8,),
        (9,),
        (a,A,),
        (b,B,),
        (c,C,),
        (d,D,),
        (e,E,),
        (f,F,),
        (g,G,),
        (h,H,),
        (k,K,),
        (m,M,),
        (n,N,),
        (p,P,),
        (r,R,),
        (t,T,),
        (u,U,V,v),
        (w,W,),
        (x,X,),
        (y,Y,),
    )

    BASE2 = "01"
    BASE10 = "0123456789"
    BASE62 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
    BASE16 = (
        (0,),
        (1,),
        (2,),
        (3,),
        (4,),
        (5,),
        (6,),
        (7,),
        (8,),
        (9,),
        (A,a,),
        (B,b,),
        (C,c,),
        (D,d,),
        (E,e,),
        (F,f,),
    )

    def __init__(self, digitset=HUMAN_TABLE):
        if type(digitset) in (types.StringType, types.UnicodeType) :
            self.digitset = [(v) for v in digitset]
        else :
            self.digitset = digitset

        self.base = len(self.digitset)
        self.output_map = {}

        self.output_digits = [v[0] for v in self.digitset]
        self.input_set = {}
        for idx, l in enumerate(self.digitset) :
            for k in l :
                self.input_set[k] = idx

        #print ‘OUT DIGITS’, self.output_digits
        #print ‘INPUT SET’, self.input_set

    def from_decimal(self, i):
        return self.convert(i, self.BASE10, self.output_digits)

    def to_decimal(self, s):
        return int(self.convert(s, self.input_set, self.BASE10))

    def convert(self, number, fromdigits, todigits) :
        fd = fromdigits
        fbase = self.base
        if type(fromdigits) in (types.StringType, types.UnicodeType) :
            fbase = len(fromdigits)
            fd = dict([(fromdigits[idx], idx) for idx in range(0,len(fromdigits))])

        return self._convert(number, fbase, fd, todigits)

    @staticmethod
    def _convert(number, fbase, fromdigits, todigits) :
        # Based on http://code.activestate.com/recipes/111286/
        number = str(number)

        if number[0] == -:
            number = number[1:]
            neg = 1
        else:
            neg = 0

        # make an integer out of the number
        x = 0
        #print "fbase = ", len(fromdigits)
        for digit in number :
            try :
                x = x * fbase + fromdigits[digit]
            except KeyError, e:
                raise ValueError("character ‘%s’ not in base" % digit)

        # create the result in base ‘len(todigits)’
        tbase = len(todigits)
        if x == 0:
            res = todigits[0]
        else:
            res = ""
        while x > 0:
            #print "divmod(%d, %d) = %r" % (x, tbase, divmod(x,tbase))
            x, digit = divmod(x, tbase)
            res = todigits[digit] + res

        if neg:
            res = - + res
        return res

binary = BaseConverter(BaseConverter.BASE2)
hex = BaseConverter(BaseConverter.BASE16)
base62 = BaseConverter(BaseConverter.BASE62)
human = BaseConverter()

if __name__ == __main__ :
    import doctest
    import random
    doctest.testmod()