> module Token(tokenise) where > import Core_datatype > import Kernel > import Type_defs > endofline = "\n\f\r" > whitespace = " \t" ++ endofline > a_symbol = "!#$&*-+=~<>?/\\.:|@" > singlechrs = ",;\"`\'(){}[]\176\185" > binder = "\208\211\236\177\178\229\196" > ifx_binder = "\183\184\167\182\187" > ifx_op = "\179\180=\172:" > reserved = [ "empty", "signature", "extend", "combine", "with", "sharing" , > "++", "=", "(", ")", "@", "operator", ";", ":", "\189", > "datatype", "|", "case", "of", "\167", "recurse", "end", > "typed", "fn", "rec", "if", "then", "else", "let", "in", > "\168", "\169", "{", "}", "[", "]", "\208", "\211", "\236", > "\229", "\177", "\178", ".", ",", "\181", "\179", "\180", > "\182", "\172", "\183", "\184", "\185", "\176", "BinL", > "BinR", "Pre", "Post", "\196", "\187" ] > tokenise :: String -> [Token] > tokenise string > = denull ( scan' string "" ) > denull :: [Token] -> [Token] > denull ( Clr "" : tkl ) = denull tkl > denull ( Clr tk : tkl ) > = tag rev_tk : denull tkl > where > tag | rev_tk `elem` reserved = Rvd > | otherwise = Clr > rev_tk = reverse tk > denull ( Rvd tk : tkl ) = error " optimise to trap some reserved earlier and reduce length of reserved set" > denull ( Scan_Err mess : _ ) = [ Scan_Err mess ] > denull ( tk : tkl ) = tk : denull tkl > denull [] = [] > denull x = error ( show x ) > scan' ( a : x ) current > | a `elem` binder = Clr current : ( Bdr ( mk_bdr a ) : scan' x "" ) > | a `elem` ifx_binder = Clr current : ( IfxBdr [a] : scan' x "" ) > | a `elem` ifx_op = Clr current : ( IfxOp [a] : scan' x "" ) > | single a = Clr current : ( Clr [a] : scan' x "" ) > | a `elem` a_symbol = Clr current : symbol_scan x [a] > | a `elem` whitespace = Clr current : scan' x "" > | alphanum' a = scan' x ( a : current ) > | a == '%' = Clr current : scan' ( discard x ) "" > | a == '^' = super_scan x ( '^' : current ) > | otherwise = [ Scan_Err "\nInvalid character\n" ] > scan' [] current = [ Clr current ] > symbol_scan :: String -> String -> [Token] > symbol_scan ( a : x ) current > | a `elem` a_symbol = symbol_scan x ( a : current ) > | a == '^' = super_scan x ( a : current ) > | otherwise = Clr current : scan' ( a : x ) "" > symbol_scan [] current = [ Clr current ] > super_scan _ "^" = [ Scan_Err "\nAttempt to superscript invalid string\n"] > super_scan ( a : x ) current > | alphanum a || a `elem` a_symbol > = Clr ( a : current ) : scan' x "" > | otherwise = [ Scan_Err "\nInvalid superscript character\n" ] > super_scan [] _ = [ Scan_Err "\nEmpty superscript\n" ] > alphanum' ch = alphanum ch || ch == '_' > alphanum ch > = ( ch >= 'A' && ch <= 'Z' ) || > ( ch >= 'a' && ch <= 'z' ) || > ( ch >= '0' && ch <= '9' ) > single ch > = ch `elem` singlechrs || ch > '\DEL' > discard :: String -> String > discard ( ch : x ) > | ch `elem` endofline = x > | otherwise = discard x > discard [] = "" > mk_bdr :: Char -> Binder_conn > {- > mk_bdr x | fromEnum x == 208 = Pi > mk_bdr x | fromEnum x == 211 = Sigma > mk_bdr x | fromEnum x == 236 = Lambda > mk_bdr x | fromEnum x == 177 = Forall > mk_bdr x | fromEnum x == 178 = Exists > mk_bdr x | fromEnum x == 229 = Choose > -} > mk_bdr '\208' = Pi > mk_bdr '\211' = Sigma > mk_bdr '\236' = Lambda > mk_bdr '\177' = Forall > mk_bdr '\178' = Exists > mk_bdr '\229' = Choose > mk_bdr '\196' = Delta > mk_bdr oth = error ( "Mk_bdr: " ++ [oth]) token display function used in debugging messages > disp_tk :: Token -> String > disp_tk ( Rvd str ) = str > disp_tk ( Clr str ) = str