Human readable base conversion

Code review time… In a conversation about URL shorteners and “Coke Rewards” realized that there was a case where I needed to be able to generate safe character strings that had high reliability for input back by human beings. The typical Base62 systems where there is ambiguity between (O, o and 0) make things hard (along with all of those upper vs. lower case cases).

Here’s the quick module I put together that is a safe base converter to human readable numbers.

  1
  2import types
  3
  4class BaseConverter(object):
  5    """ Convert a number between two bases of digits, by default it’s a human safe set
  6
  7    >>> v = BaseConverter(BaseConverter.BASE10)
  8    >>> v.to_decimal(22)
  9    22
 10    >>> v.from_decimal(22)
 11    ’22’
 12
 13    >>> v = BaseConverter(BaseConverter.BASE2)
 14    >>> v.to_decimal(22)
 15    Traceback (most recent call last):
 16 17    ValueError: character ‘2’ not in base
 18    >>> v.to_decimal(10)
 19    2
 20    >>> v.to_decimal(’10’)
 21    2
 22    >>> v.from_decimal(22)
 23    ‘10110’
 24
 25    >>> v = BaseConverter()
 26    >>> v.to_decimal(22)
 27    58
 28    >>> v.from_decimal(123123)
 29    ‘5h17’
 30    >>> v.to_decimal(‘5H17’)
 31    123123
 32
 33    >>> v = BaseConverter(BaseConverter.BASE62)
 34    >>> v.from_decimal(257938572394L)
 35    ‘4XYBxik’
 36    >>> v.to_decimal(‘4XYBxik’)
 37    257938572394
 38
 39    >>> v = BaseConverter(((‘Zero ‘,),(‘One ‘,)))
 40    >>> v.from_decimal(BaseConverter(BaseConverter.BASE2).to_decimal(‘1101’))
 41    ‘One One Zero One ‘
 42
 43    """
 44
 45    HUMAN_TABLE = (
 46        (0,O,o,Q,q),
 47        (1,I,i,L,l,J,j),
 48        (2,Z,z),
 49        (3,),
 50        (4,),
 51        (5,S,s),
 52        (6,),
 53        (7,),
 54        (8,),
 55        (9,),
 56        (a,A,),
 57        (b,B,),
 58        (c,C,),
 59        (d,D,),
 60        (e,E,),
 61        (f,F,),
 62        (g,G,),
 63        (h,H,),
 64        (k,K,),
 65        (m,M,),
 66        (n,N,),
 67        (p,P,),
 68        (r,R,),
 69        (t,T,),
 70        (u,U,V,v),
 71        (w,W,),
 72        (x,X,),
 73        (y,Y,),
 74    )
 75
 76    BASE2 = "01"
 77    BASE10 = "0123456789"
 78    BASE62 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
 79    BASE16 = (
 80        (0,),
 81        (1,),
 82        (2,),
 83        (3,),
 84        (4,),
 85        (5,),
 86        (6,),
 87        (7,),
 88        (8,),
 89        (9,),
 90        (A,a,),
 91        (B,b,),
 92        (C,c,),
 93        (D,d,),
 94        (E,e,),
 95        (F,f,),
 96    )
 97
 98    def __init__(self, digitset=HUMAN_TABLE):
 99        if type(digitset) in (types.StringType, types.UnicodeType) :
100            self.digitset = [(v) for v in digitset]
101        else :
102            self.digitset = digitset
103
104        self.base = len(self.digitset)
105        self.output_map = {}
106
107        self.output_digits = [v[0] for v in self.digitset]
108        self.input_set = {}
109        for idx, l in enumerate(self.digitset) :
110            for k in l :
111                self.input_set[k] = idx
112
113        #print ‘OUT DIGITS’, self.output_digits
114        #print ‘INPUT SET’, self.input_set
115
116    def from_decimal(self, i):
117        return self.convert(i, self.BASE10, self.output_digits)
118
119    def to_decimal(self, s):
120        return int(self.convert(s, self.input_set, self.BASE10))
121
122    def convert(self, number, fromdigits, todigits) :
123        fd = fromdigits
124        fbase = self.base
125        if type(fromdigits) in (types.StringType, types.UnicodeType) :
126            fbase = len(fromdigits)
127            fd = dict([(fromdigits[idx], idx) for idx in range(0,len(fromdigits))])
128
129        return self._convert(number, fbase, fd, todigits)
130
131    @staticmethod
132    def _convert(number, fbase, fromdigits, todigits) :
133        # Based on http://code.activestate.com/recipes/111286/
134        number = str(number)
135
136        if number[0] == -:
137            number = number[1:]
138            neg = 1
139        else:
140            neg = 0
141
142        # make an integer out of the number
143        x = 0
144        #print "fbase = ", len(fromdigits)
145        for digit in number :
146            try :
147                x = x * fbase + fromdigits[digit]
148            except KeyError, e:
149                raise ValueError("character ‘%s’ not in base" % digit)
150
151        # create the result in base ‘len(todigits)’
152        tbase = len(todigits)
153        if x == 0:
154            res = todigits[0]
155        else:
156            res = ""
157        while x > 0:
158            #print "divmod(%d, %d) = %r" % (x, tbase, divmod(x,tbase))
159            x, digit = divmod(x, tbase)
160            res = todigits[digit] + res
161
162        if neg:
163            res = - + res
164        return res
165
166binary = BaseConverter(BaseConverter.BASE2)
167hex = BaseConverter(BaseConverter.BASE16)
168base62 = BaseConverter(BaseConverter.BASE62)
169human = BaseConverter()
170
171if __name__ == __main__ :
172    import doctest
173    import random
174    doctest.testmod()