'''Huffman variable length encoding'''
from heap import Heap
from simpletable import prettyTable
def huffman(distrib):
'''distrib can be a list of entries (freq, sym) where
sym is a symbol to encode and freq is a number, its frequency,
or distrib is a string with each line: sym freq.
A pair (codes, tree) is returned, where
codes is list of entries (sym, code, freq), and
tree is a recursively nested list, with internal nodes like the root:
[freq-sum, '', left-tree, right-tree],
and leaves
[freq, sym] '''
if isinstance(distrib, str):
distrib = fromTableToList(distrib)
h = Heap([list(e) for e in distrib], maxheap=False)
while len(h) > 1:
left = h.popitem()
right = h.popitem()
h.add([left[0]+right[0], '', left, right])
tree = h.popitem() # single tree remaining is full tree
codes = []
makeCodes(tree, '', codes)
return (codes, tree)
def fromTableToList(s):
'''convert table string with lines: sym freqString
to list of (freq, sym).''' # freq first for Python comparison in heap
lines = s.strip().splitlines()
pairs = [line.split() for line in lines]
return [(int(freq), sym) for [sym, freq] in pairs]
def makeCodes(tree, prefix, codes):
''' final codes filled out with entries (sym, code, freq)
from Huffman sub-tree and its code prefix .'''
[freq, sym, *children] = tree # children is empty list for a leaf
if sym != '':
codes.append((sym, freq, prefix))
else:
left, right = children
makeCodes(left, prefix+'0', codes)
makeCodes(right, prefix+'1', codes)
def printTree(tree, indent='', bit = '*'):
'''print Huffman tree recursively,
indicating depth of children by indent width;
nodes other than the root show the bit added. '''
[freq, sym, *children] = tree
if sym != '':
print(indent, bit, sym, freq)
else:
print(indent, bit, 'sum:', freq)
left, right = children
printTree(left, indent+' ', '0')
printTree(right, indent+' ', '1')
table1 = '''
A 5
B 3
_ 2
C 1
L 1
K 1'''
table2 = '''
A 2
B 2
C 2
D 2
E 2
F 2
G 2
H 2'''
table3 = '''
A 2
B 3
C 3
D 4
E 6
F 8'''
def main():
for t in [table1, table2, table3]:
codes, tree = huffman(t)
prettyTable(codes, 'sym freq code'.split(), justify='>><')
print('\n'+'tree (later indent indicates children)')
printTree(tree)
print('\n'+'-'*15+'\n')
if __name__ == '__main__':
main()