{- - BinConv.hs - - Paul Sanders, SRD. 1992 - - This module contains routines for converting numbers to and from a - number of binary digits. - -} module BinConv (codes_to_ascii, ascii_to_codes, dec_to_binx) where zeroes = '0' : zeroes -- dec_to_binx converts a decimal to a fixed number of binary digits -- dec_to_binx #binary-digits decimal-number = binary-string dec_to_binx :: Int -> Int -> String dec_to_binx x y = take (x - length bin_string) zeroes ++ bin_string where bin_string = dec_to_bin y dec_to_bin = reverse . dec_to_bin' dec_to_bin' 0 = [] dec_to_bin' x = (if (x `rem` 2) == 1 then '1' else '0') : dec_to_bin' (x `div` 2) codes_to_ascii :: [Int] -> [Int] codes_to_ascii [] = [] codes_to_ascii (x:y:ns) = x_div : ((x_rem * 16) + y_div) : y_rem : codes_to_ascii ns where (x_div, x_rem) = divRem x 16 (y_div, y_rem) = divRem y 256 codes_to_ascii [n] = [x_div , x_rem] where (x_div, x_rem) = divRem n 16 ascii_to_codes [] = [] ascii_to_codes (x:y:z:ns) = (x * 16) + y_div : (y_rem * 256) + z : ascii_to_codes ns where (y_div, y_rem) = divRem y 16 ascii_to_codes [x,y] = [(x * 16) + y_rem] where (y_div, y_rem) = divRem y 16 divRem x y = (x `div` y, x `rem` y) -- missing from PreludeCore ? module Main (main) where main = putStr (show (result 1000)) result 0 = [] result n = codes_to_ascii (3077, 1192) ++ result (n-1) codes_to_ascii (x,y) = x_div : ((x_rem * 16) + y_div) : [y_rem] where (x_div, x_rem) = divRem x 16 (y_div, y_rem) = divRem y 256 divRem x y = (x `div` y, x `rem` y) -- missing from PreludeCore ? {- - Decode.hs - - Module containing the code to decode LZW encodings - - Paul Sanders, Applications Research Division, BTL 1992 - - DEC_VERSION 1 uses a list with keys in ascending order as a table, ie. - entry n is given by table!!n. - - DEC_VERSION 2 uses a list with keys in descending order as a table, ie. - entry n is given by table!!(#table-n). We don't need to calculate the - length of the table however as this is given by the value of the next - code to be added. - - DEC_VERSION 3 uses a balanced binary tree to store the keys. We can do - this cheaply by putting the key in the correct place straight away and - therefore not doing any rebalancing. -} module Decode (decode) where import Prelude hiding( lookup ) -- lookup defined locally import Defaults import BinConv data Optional a = NONE | SOME a deriving (Eq, Show{-was:Text-}) {- We ideally want to store the table as an array but these are inefficient - so we use a list instead. We don't use the tree used by encode since we - can make use of the fact that all our keys (the codes) come in order and - will be placed at the end of the table, at position 'code'. - - An entry of (SOME n, 'c') indicates that this code has prefix code n - and final character c. -} {- Kick off the decoding giving the real function the first code value and - the initial table. -} decode :: [Int] -> String decode [] = [] decode cs = decode' cs first_code init_table {- decode` decodes the first character which is special since no new code - gets added for it. It is also special in so far as we know that the - code is a singleton character and thus has prefix NONE. The '@' is a - dummy character and can be anything. -} decode' [] _ _ = [] decode' (c:cs) n t = ch : do_decode cs n c ch t where (NONE, ch) = lookup c t {- do_decode decodes all the codes bar the first. - - If the code is in the table (ie the code is less than the next code to be - added) then we output the string for that code (using unfold if a prefix - type) and add a new code to the table with the final character output as - the extension and the previous code as prefix. - - If the code is not one we know about then we give it to decode_special for - special treatment -} do_decode [] _ _ _ _ = [] do_decode (c:cs) n old_n fin_char t = if c >= n -- we don't have this code in the table yet then decode_special (c:cs) n old_n fin_char t else outchs ++ do_decode cs n' c (head outchs) t' where outchs = reverse (unfold c (n-1) t) (n', t') = if n == max_entries then (n, t) else (n+1, insert n (SOME old_n, head outchs) t) {- decode_special decodes a code that isn't in the table. - - The algorithm in Welch describes why this works, suffice it to say that - the output string is given by the last character output and the string - given by the previous code. An entry is also made in the table for the - last character output and the old code. -} decode_special (c:cs) n old_n fin_char t = outchs ++ do_decode cs n' c (head outchs) t' where outchs = reverse (fin_char : unfold old_n (n-1) t) (n', t') = if n == max_entries then (n, t) else (n+1, insert n (SOME old_n, fin_char) t) {- unfold a prefix code. - - chain back through the prefixes outputting the extension characters as we - go. -} unfold n t_len t = if prefix == NONE then [c] else c : unfold n' t_len t where (prefix, c) = lookup n t SOME n' = prefix data DecompTable = Branch DecompTable DecompTable | Leaf (Optional Int, Char) deriving (Show{-was:Text-}) {- Insert a code pair into the table. The position of the code is given by - the breakdown of the key into its binary digits -} insert n v t = insert' (dec_to_binx code_bits n) v t {- We can place a code exactly where it belongs using the following algorithm. - Take the code's binary rep expanded to the maximum number of bits. Start - at the first bit, if a 0 then insert the code to the left, if a 1 then - insert to the right. Carry on with the other bits until we run out and are - thus at the right place and can construct the node. -} insert' [] v (Leaf _) = Leaf v insert' ('0' : bs) v (Branch l r) = Branch (insert' bs v l) r insert' ('1' : bs) v (Branch l r) = Branch l (insert' bs v r) insert' ('0' : bs) v t = Branch (insert' bs v t) t insert' ('1' : bs) v t = Branch t (insert' bs v t) {- For a lookup we use the same mechanism to locate the position of the item - in the tree but if we find that the route has not been constructed or the - node has the dummy value then that code is not yet in the tree. The way - in which the decode algorithm works this should never happen. -} lookup n t = lookup' (dec_to_binx code_bits n) t lookup' [] (Leaf v) = v lookup' ('0' : bs) (Branch l _) = lookup' bs l lookup' ('1' : bs) (Branch _ r) = lookup' bs r lookup' _ _ = error "tree insert error - seek professional help" init_table = mk_init_table 0 (Leaf (SOME 99999, '@')) mk_init_table 256 t = t mk_init_table n t = mk_init_table (n+1) (insert n (NONE, toEnum n) t) {- - Defaults.hs - - Contains the tuning values for compress and uncompress - -} module Defaults where -- Maximum number of table entries (probably = 2^code_bits) max_entries :: Int max_entries = 2 ^ code_bits -- First code value available first_code :: Int first_code = 256 -- Number of bits per output character ascii_bits :: Int ascii_bits = 8 -- Number of bits to represent code by code_bits :: Int code_bits = 12 {- - Encode Mk 2, using a prefix table for the codes - - Paul Sanders, Systems Research, British Telecom Laboratories 1992 -} module Encode (encode) where import Defaults import PTTrees -- for convenience we make the code table type explicit type CodeTable = PrefixTree Char Int -- encode sets up the arguments for the real function. encode :: String -> [Int] encode input = encode' input first_code initial_table {- - encode' loops through the input string assembling the codes produced - by code_string. The first character is treated specially in that it - is not added to the table; its code is simply its ascii value. -} encode' [] _ _ = [] encode' input v t = case (code_string input 0 v t) of { (input', n, t') -> n : encode' input' (v + 1) t' } {- - code_string parses enough of the input string to produce one code and - returns the remaining input, the code and a new code table. - - The first character is taken and its place found in the code table. The - extension code table found for this character is then used as the lookup - table for the next character. - - If a character is not found in the current table then output the code - of the character associated with the current table and add the current - character to the current table and assign it the next new code value. -} code_string input@(c : input2) old_code next_code (PT p@(PTE k v t) l r) | c < k = (f1 r1 p r) | c > k = (f2 r2 p l) | otherwise = (f3 r3 k v l r) where r1 = code_string input old_code next_code l r2 = code_string input old_code next_code r r3 = code_string input2 v next_code t f1 (input_l,nl,l2) p r = (input_l,nl,PT p l2 r) f2 (input_r,nr,r2) p l = (input_r,nr,PT p l r2) f3 (input2,n,t2) k v l r = (input2, n, PT (PTE k v t2) l r) code_string input@(c : input_file2) old_code next_code PTNil | next_code >= 4096 = (input, old_code, PTNil) | otherwise = (input, old_code, PT (PTE c next_code PTNil) PTNil PTNil) code_string [] old_code next_code code_table = ([], old_code, PTNil) {- - We want the inital table to be balanced, but this is expensive to compute - as a rebalance is needed evert two inserts (yuk!). So we do the ordinary - infix-order binary tree insert but give the keys in such an order as to - give a balanced tree. - - (I would have defined the tree by hand but the constant was too big - for hc-0.41) -} initial_table :: CodeTable initial_table = foldr tab_insert PTNil balanced_list tab_insert n = insert (toEnum n) n balanced_list = [128,64,32,16,8,4,2,1,0,3,6,5,7,12,10,9,11,14,13,15,24,20,18,17,19,22, 21,23,28,26,25,27,30,29,31,48,40,36,34,33,35,38,37,39,44,42,41,43,46, 45,47,56,52,50,49,51,54,53,55,60,58,57,59,62,61,63,96,80,72,68,66,65] ++ bal_list2 ++ bal_list3 ++ bal_list4 ++ bal_list5 bal_list2 = [67,70,69,71,76,74,73,75,78,77,79,88,84,82,81,83,86,85,87,92,90,89,91, 94,93,95,112,104,100,98,97,99,102,101,103,108,106,105,107,110,109,111, 120,116,114,113,115,118,117,119,124,122,121,123,126,125,127,192,160] bal_list3 = [144,136,132,130,129,131,134,133,135,140,138,137,139,142,141,143,152, 148,146,145,147,150,149,151,156,154,153,155,158,157,159,176,168,164, 162,161,163,166,165,167,172,170,169,171,174,173,175,184,180,178,177] bal_list4 = [179,182,181,183,188,186,185,187,190,189,191,224,208,200,196,194,193, 195,198,197,199,204,202,201,203,206,205,207,216,212,210,209,211,214, 213,215,220,218,217,219,222,221,223,240,232,228,226,225,227,230,229, 231,236,234,233,235,238,237,239,248,244,242,241,243,246,245,247,252] bal_list5 = [250,249,251,254,253,255] module Main (main){-export list added by partain-} where { -- partain: with "ghc -cpp -DSLEAZY_UNBOXING", you get (guess what)? -- without it, you get the code as originally written. -- -- Things done here: -- * The obvious unboxing (e.g., Int ==> Int#). -- * use quot/rem, not div/mod -- * inline PrefixElement type into PrefixTree.PT constructor -- * cvt final clause of 3-way comparison to "otherwise" -- * use shifts, not quot/rem (not necessary: C compiler converts -- them just fine) -- -- Obviously, more egregious hacking could be done: -- * replace Tuple/List types that mention Ints with specialised -- variants #if defined(__GLASGOW_HASKELL__) && defined(SLEAZY_UNBOXING) #define FAST_INT Int# #define ILIT(x) (x#) #define IBOX(x) (I# (x)) #define _ADD_ `plusInt#` #define _SUB_ `minusInt#` #define _MUL_ `timesInt#` #define _DIV_ `divInt#` #define _QUOT_ `quotInt#` #define _REM_ `remInt#` #define _NEG_ negateInt# #define _EQ_ `eqInt#` #define _LT_ `ltInt#` #define _LE_ `leInt#` #define _GE_ `geInt#` #define _GT_ `gtInt#` #define _CHR_ chr# #define FAST_BOOL Int# #define _TRUE_ 1# #define _FALSE_ 0# #define _IS_TRUE_(x) ((x) `eqInt#` 1#) #define FAST_CHAR Char# #define CBOX(x) (C# (x)) data FAST_TRIPLE = TRIP [Char] Int# PrefixTree; #define _TRIP_(a,b,c) (TRIP (a) (b) (c)) #define PrefixElement FAST_CHAR FAST_INT PrefixTree #define _PTE_(a,b,c) (a) (b) (c) #else {- ! __GLASGOW_HASKELL__ -} #define FAST_INT Int #define ILIT(x) (x) #define IBOX(x) (x) #define _ADD_ + #define _SUB_ - #define _MUL_ * #define _DIV_ `div` #define _QUOT_ `quot` #define _REM_ `rem` #define _NEG_ - #define _EQ_ == #define _LT_ < #define _LE_ <= #define _GE_ >= #define _GT_ > #define _CHR_ toEnum #define FAST_BOOL Bool #define _TRUE_ True #define _FALSE_ False #define _IS_TRUE_(x) (x) #define FAST_CHAR Char #define CBOX(x) (x) type FAST_TRIPLE = ([Char], Int, PrefixTree); #define _TRIP_(a,b,c) ((a), (b), (c)) data PrefixElement = PTE FAST_CHAR FAST_INT PrefixTree; #define _PTE_(a,b,c) (PTE (a) (b) (c)) #endif {- ! __GLASGOW_HASKELL__ -} -- end of partain data PrefixTree = PTNil | PT PrefixElement PrefixTree PrefixTree; --create_code_table :: PrefixTree; -- partain: sig create_code_table = create_code_table2 ILIT(0) ILIT(256); create_code_table2 :: FAST_INT -> FAST_INT -> PrefixTree; create_code_table2 first_code ILIT(0) = PTNil; create_code_table2 first_code ILIT(1) = PT _PTE_((_CHR_ first_code), first_code, PTNil) PTNil PTNil; create_code_table2 first_code n_codes = PT _PTE_((_CHR_ m_code), m_code, PTNil) left right where { left = create_code_table2 first_code (m_code _SUB_ first_code); right = create_code_table2 m_code2 ((first_code _ADD_ n_codes) _SUB_ m_code2); m_code = (first_code _ADD_ (first_code _ADD_ n_codes _SUB_ ILIT(1))) _QUOT_ ILIT(2); m_code2 = m_code _ADD_ ILIT(1); }; lzw_code_file :: [Char] -> PrefixTree -> FAST_INT -> [Int]; lzw_code_file [] code_table next_code = []; lzw_code_file input code_table next_code = -- partain: case-ified lazy where case (code_string input ILIT(0) next_code code_table) of { _TRIP_(input2,n,code_table2) -> IBOX(n) : lzw_code_file input2 code_table2 (next_code _ADD_ ILIT(1)) }; code_string :: [Char] -> FAST_INT -> FAST_INT -> PrefixTree -> FAST_TRIPLE; #if defined(__GLASGOW_HASKELL__) && defined(SLEAZY_UNBOXING) code_string input@(CBOX(c) : input2) old_code next_code (PT k v t {-p@(PTE k v t)-} l r) | CBOX(c) < CBOX(k) = f1 r1 {-p-} k v t r | CBOX(c) > CBOX(k) = f2 r2 {-p-} k v t l | otherwise {- CBOX(c) == CBOX(k) -} = f3 r3 k v l r #else code_string input@(CBOX(c) : input2) old_code next_code (PT p@(PTE k v t) l r) | CBOX(c) < CBOX(k) = f1 r1 p r | CBOX(c) > CBOX(k) = f2 r2 p l | otherwise {- CBOX(c) == CBOX(k) -} = f3 r3 k v l r #endif where { r1 = code_string input old_code next_code l; r2 = code_string input old_code next_code r; r3 = code_string input2 v next_code t; #if defined(__GLASGOW_HASKELL__) && defined(SLEAZY_UNBOXING) f1 _TRIP_(input_l,nl,l2) k v t r = _TRIP_(input_l,nl,PT k v t l2 r); f2 _TRIP_(input_r,nr,r2) k v t l = _TRIP_(input_r,nr,PT k v t l r2); #else f1 _TRIP_(input_l,nl,l2) p r = _TRIP_(input_l,nl,PT p l2 r); f2 _TRIP_(input_r,nr,r2) p l = _TRIP_(input_r,nr,PT p l r2); #endif f3 _TRIP_(input2,n,t2) k v l r = _TRIP_(input2, n, PT _PTE_(k, v, t2) l r); }; --code_string input@(c : input2) old_code next_code (PT p@(PTE k v t) l r) -- | c < k = (input_l,nl,PT p l' r) -- | c > k = (input_r,nr,PT p l r') -- | c == k = (input',n,PT (PTE k v t') l r) -- where { -- (input_l,nl,l') = code_string input old_code next_code l; -- (input_r,nr,r') = code_string input old_code next_code r; -- (input',n,t') = code_string input2 v next_code t; -- }; code_string input@(CBOX(c) : input_file2) old_code next_code PTNil = if (next_code _GE_ ILIT(4096)) then _TRIP_(input, old_code, PTNil) else _TRIP_(input, old_code, PT _PTE_(c, next_code, PTNil) PTNil PTNil); code_string [] old_code next_code code_table = _TRIP_([], old_code, PTNil); integer_list_to_char_list (IBOX(n) : l) = CBOX(_CHR_ (n _QUOT_ ILIT(16))) : integer_list_to_char_list2 l n; integer_list_to_char_list [] = []; integer_list_to_char_list2 (IBOX(c) : l) n = CBOX(_CHR_ ((n _MUL_ ILIT(16)) _ADD_ ((c _QUOT_ ILIT(256)) _REM_ ILIT(16)))) : CBOX(_CHR_ c) : integer_list_to_char_list l; integer_list_to_char_list2 [] n = CBOX(_CHR_ (n _MUL_ ILIT(16))) : []; main :: IO (); main = getContents >>= \ input_string -> main2 input_string; main2 :: String -> IO (); main2 input_string = putStr output_list where { output_list = integer_list_to_char_list code_list; code_list = lzw_code_file input_string create_code_table ILIT(256); }; } -- Lzw2.hs looks like an earlier version of Lzw.hs module Main (main){-export list added by partain-} where { -- partain: with "ghc -cpp -DSLEAZY_UNBOXING", you get (guess what)? -- without it, you get the code as originally written. -- -- Things done here: -- * The obvious unboxing (e.g., Int ==> Int#). -- * use quot/rem, not div/mod -- * inline PrefixElement type into PrefixTree.PT constructor -- * cvt final clause of 3-way comparison to "otherwise" -- * use shifts, not quot/rem (not necessary: C compiler converts -- them just fine) -- -- Obviously, more egregious hacking could be done: -- * replace Tuple/List types that mention Ints with specialised -- variants #define FAST_INT Int# #define ILIT(x) (x#) #define IBOX(x) (I# (x)) #define _ADD_ `plusInt#` #define _SUB_ `minusInt#` #define _MUL_ `timesInt#` #define _DIV_ `divInt#` #define _QUOT_ `quotInt#` #define _REM_ `remInt#` #define _NEG_ negateInt# #define _EQ_ `eqInt#` #define _LT_ `ltInt#` #define _LE_ `leInt#` #define _GE_ `geInt#` #define _GT_ `gtInt#` #define _CHR_ chr# #define FAST_BOOL Int# #define _TRUE_ 1# #define _FALSE_ 0# #define _IS_TRUE_(x) ((x) `eqInt#` 1#) #define FAST_CHAR Char# #define CBOX(x) (C# (x)) data FAST_TRIPLE = TRIP [Char] Int# PrefixTree; #define _TRIP_(a,b,c) (TRIP (a) (b) (c)) #define PrefixElement FAST_CHAR FAST_INT PrefixTree #define _PTE_(a,b,c) (a) (b) (c) -- end of partain data PrefixTree = PTNil | PT PrefixElement PrefixTree PrefixTree; create_code_table = create_code_table2 ILIT(0) ILIT(256); create_code_table2 :: FAST_INT -> FAST_INT -> PrefixTree; create_code_table2 first_code ILIT(0) = PTNil; create_code_table2 first_code ILIT(1) = PT _PTE_((_CHR_ first_code), first_code, PTNil) PTNil PTNil; create_code_table2 first_code n_codes = PT _PTE_((_CHR_ m_code), m_code, PTNil) left right where { left = create_code_table2 first_code (m_code _SUB_ first_code); right = create_code_table2 m_code2 ((first_code _ADD_ n_codes) _SUB_ m_code2); m_code = (first_code _ADD_ (first_code _ADD_ n_codes _SUB_ ILIT(1))) _QUOT_ ILIT(2); m_code2 = m_code _ADD_ ILIT(1); }; lzw_code_file :: [Char] -> PrefixTree -> FAST_INT -> [Int]; lzw_code_file [] code_table next_code = []; lzw_code_file input code_table next_code = -- partain: case-ified lazy where case (code_string ILIT(0) next_code input code_table) of { _TRIP_(input2,n,code_table2) -> IBOX(n) : lzw_code_file input2 code_table2 (next_code _ADD_ ILIT(1)) }; code_string :: FAST_INT -> FAST_INT -> [Char] -> PrefixTree -> FAST_TRIPLE; code_string old_code next_code input@(CBOX(c) : input2) (PT k v t {-p@(PTE k v t)-} l r) | CBOX(c) < CBOX(k) = _scc_ "cs1" (f1 r1 {-p-} k v t r) | CBOX(c) > CBOX(k) = _scc_ "cs2" (f2 r2 {-p-} k v t l) | otherwise {- CBOX(c) == CBOX(k) -} = _scc_ "cs3" (f3 r3 k v l r) where { r1 = code_string old_code next_code input l; r2 = code_string old_code next_code input r; r3 = code_string v next_code input2 t; f1 _TRIP_(input_l,nl,l2) k v t r = _TRIP_(input_l,nl,PT k v t l2 r); f2 _TRIP_(input_r,nr,r2) k v t l = _TRIP_(input_r,nr,PT k v t l r2); f3 _TRIP_(input2,n,t2) k v l r = _TRIP_(input2, n, PT _PTE_(k, v, t2) l r); }; code_string old_code next_code input@(CBOX(c) : input_file2) PTNil = if (next_code _GE_ ILIT(4096)) then _scc_ "cs4" _TRIP_(input, old_code, PTNil) else _scc_ "cs5" _TRIP_(input, old_code, PT _PTE_(c, next_code, PTNil) PTNil PTNil); code_string old_code next_code [] code_table = _scc_ "cs6" _TRIP_([], old_code, PTNil); integer_list_to_char_list (IBOX(n) : l) = CBOX(_CHR_ (n _QUOT_ ILIT(16))) : integer_list_to_char_list2 l n; integer_list_to_char_list [] = []; integer_list_to_char_list2 (IBOX(c) : l) n = CBOX(_CHR_ ((n _MUL_ ILIT(16)) _ADD_ ((c _QUOT_ ILIT(256)) _REM_ ILIT(16)))) : CBOX(_CHR_ c) : integer_list_to_char_list l; integer_list_to_char_list2 [] n = CBOX(_CHR_ (n _MUL_ ILIT(16))) : []; main :: IO (); main = getContents >>= \input_string -> main2 input_string; main2 :: String -> IO (); main2 input_string = putStr output_list where { output_list = integer_list_to_char_list code_list; code_list = lzw_code_file input_string create_code_table ILIT(256); }; } {- - Compress.hs - - This program is a version of the compress utility as defined in - "A Technique for High Performance Data Compression", Terry A. Welch, - Computer, vol 17, no 6 1984, pp 8-19 - - Usage: compress file - - Paul Sanders, Systems Research Division, British Telecom Laboratories 1992 - -} module Main (main) where import Defaults import BinConv -- binary conversion routines import Encode -- coding routine main = getContents >>= \ inp -> putStr (compress inp) {- To compress a string we first encode it, then convert it to n-bit binaries - convert back to decimal as ascii-bit values and then to characters -} compress = map toEnum . codes_to_ascii . encode module PTTrees (insert, PrefixTree(..), PrefixElem(..)) where data PrefixTree a b = PTNil | PT (PrefixElem a b) (PrefixTree a b) (PrefixTree a b) data PrefixElem a b = PTE a b (PrefixTree a b) {-partain-} --insert :: Char -> Int -> PrefixTree Char Int -> PrefixTree Char Int {-partain-} insert k v PTNil = PT (PTE k v PTNil) PTNil PTNil insert k v (PT p@(PTE k' v' t) l r) | k < k' = PT p (insert k v l) r | k > k' = PT p l (insert k v r) | otherwise = PT p l r {- - Uncompress.hs - - This program is a version of the compress utility as defined in - "A Technique for High Performance Data Compression", Terry A. Welch, - Computer, vol 17, no 6 1984, pp 8-19 - - - Paul Sanders, Systems Research Division, British Telecom Laboratories 1992 - -} module Main (main) where import Defaults import BinConv -- binary conversion routines import Decode -- decoding routines main = getContents >>= \ inp -> putStr (uncompress inp) {- To uncompress a string we first convert the characters to n-bit binaries - and then to decimals which can then be decoded. -} uncompress = decode . ascii_to_codes . map fromEnum