{- - Decode.hs - - Module containing the code to decode LZW encodings - - Paul Sanders, Applications Research Division, BTL 1992 - - DEC_VERSION 1 uses a list with keys in ascending order as a table, ie. - entry n is given by table!!n. - - DEC_VERSION 2 uses a list with keys in descending order as a table, ie. - entry n is given by table!!(#table-n). We don't need to calculate the - length of the table however as this is given by the value of the next - code to be added. - - DEC_VERSION 3 uses a balanced binary tree to store the keys. We can do - this cheaply by putting the key in the correct place straight away and - therefore not doing any rebalancing. -} module Decode (decode) where import Prelude hiding( lookup ) -- lookup defined locally import Defaults import BinConv data Optional a = NONE | SOME a deriving (Eq, Show{-was:Text-}) {- We ideally want to store the table as an array but these are inefficient - so we use a list instead. We don't use the tree used by encode since we - can make use of the fact that all our keys (the codes) come in order and - will be placed at the end of the table, at position 'code'. - - An entry of (SOME n, 'c') indicates that this code has prefix code n - and final character c. -} {- Kick off the decoding giving the real function the first code value and - the initial table. -} decode :: [Int] -> String decode [] = [] decode cs = decode' cs first_code init_table {- decode` decodes the first character which is special since no new code - gets added for it. It is also special in so far as we know that the - code is a singleton character and thus has prefix NONE. The '@' is a - dummy character and can be anything. -} decode' [] _ _ = [] decode' (c:cs) n t = ch : do_decode cs n c ch t where (NONE, ch) = lookup c t {- do_decode decodes all the codes bar the first. - - If the code is in the table (ie the code is less than the next code to be - added) then we output the string for that code (using unfold if a prefix - type) and add a new code to the table with the final character output as - the extension and the previous code as prefix. - - If the code is not one we know about then we give it to decode_special for - special treatment -} do_decode [] _ _ _ _ = [] do_decode (c:cs) n old_n fin_char t = if c >= n -- we don't have this code in the table yet then decode_special (c:cs) n old_n fin_char t else outchs ++ do_decode cs n' c (head outchs) t' where outchs = reverse (unfold c (n-1) t) (n', t') = if n == max_entries then (n, t) else (n+1, insert n (SOME old_n, head outchs) t) {- decode_special decodes a code that isn't in the table. - - The algorithm in Welch describes why this works, suffice it to say that - the output string is given by the last character output and the string - given by the previous code. An entry is also made in the table for the - last character output and the old code. -} decode_special (c:cs) n old_n fin_char t = outchs ++ do_decode cs n' c (head outchs) t' where outchs = reverse (fin_char : unfold old_n (n-1) t) (n', t') = if n == max_entries then (n, t) else (n+1, insert n (SOME old_n, fin_char) t) {- unfold a prefix code. - - chain back through the prefixes outputting the extension characters as we - go. -} unfold n t_len t = if prefix == NONE then [c] else c : unfold n' t_len t where (prefix, c) = lookup n t SOME n' = prefix data DecompTable = Branch DecompTable DecompTable | Leaf (Optional Int, Char) deriving (Show{-was:Text-}) {- Insert a code pair into the table. The position of the code is given by - the breakdown of the key into its binary digits -} insert n v t = insert' (dec_to_binx code_bits n) v t {- We can place a code exactly where it belongs using the following algorithm. - Take the code's binary rep expanded to the maximum number of bits. Start - at the first bit, if a 0 then insert the code to the left, if a 1 then - insert to the right. Carry on with the other bits until we run out and are - thus at the right place and can construct the node. -} insert' [] v (Leaf _) = Leaf v insert' ('0' : bs) v (Branch l r) = Branch (insert' bs v l) r insert' ('1' : bs) v (Branch l r) = Branch l (insert' bs v r) insert' ('0' : bs) v t = Branch (insert' bs v t) t insert' ('1' : bs) v t = Branch t (insert' bs v t) {- For a lookup we use the same mechanism to locate the position of the item - in the tree but if we find that the route has not been constructed or the - node has the dummy value then that code is not yet in the tree. The way - in which the decode algorithm works this should never happen. -} lookup n t = lookup' (dec_to_binx code_bits n) t lookup' [] (Leaf v) = v lookup' ('0' : bs) (Branch l _) = lookup' bs l lookup' ('1' : bs) (Branch _ r) = lookup' bs r lookup' _ _ = error "tree insert error - seek professional help" init_table = mk_init_table 0 (Leaf (SOME 99999, '@')) mk_init_table 256 t = t mk_init_table n t = mk_init_table (n+1) (insert n (NONE, toEnum n) t)