module Lexer where -- Copyright 1994 by Peter Thiemann import Char -- 1.3 ------------------------------------------------------------------------------ --NOW the lexer ------------------------------------------------------------------------------ data Token = Ident String | Symbol String | String String instance Show Token where showsPrec n (Ident s) = showChar '[' . showString s . showString "] " showsPrec n (Symbol s) = showChar '<' . showString s . showString "> " showsPrec n (String s) = showChar '"' . showString s . showString "\" " showList [] = id showList (x:xs) = shows x . showList xs isIdChar c = isAlpha c || isDigit c || c == '_' theSymbols = "!@#$%^&*+./<=>?\\|:" isSymbolChar c = c `elem` theSymbols lexer :: String -> [Token] lexer "" = [] lexer ('"':cs) = String (stchars): lexer srest where (stchars, srest) = lexString cs lexer ('\'':cs) = String (oneChar): lexer srest where (oneChar, srest) = lexChar cs lexer (c:cs) | isSpace c = lexer cs | isAlpha c = Ident (c:idchars): lexer irest | isSymbolChar c = Symbol(c:sychars): lexer srest | otherwise = Symbol([c]): lexer cs where (idchars, irest) = span isIdChar cs (sychars, srest) = span isSymbolChar cs -- preprocessor for EBNF style comments uncomment :: String -> String uncomment "" = "" uncomment ('#':cs) = uncomment (dropWhile (/= '\n') cs) uncomment ('"':cs) = '"':uncommentString cs uncomment ('\'':cs) = '\'':uncommentChar cs uncomment (c:cs) = c:uncomment cs uncommentString "" = "" uncommentString ('\\':c:cs) = '\\':c:uncommentString cs uncommentString ('"':cs) = '"':uncomment cs uncommentString (c:cs) = c:uncommentString cs uncommentChar "" = "" uncommentChar ('\\':c:cs) = '\\':c:uncommentChar cs uncommentChar ('\'':cs) = '"':uncomment cs uncommentChar (c:cs) = c:uncommentChar cs -- generic lexers lexChar ('\\':c:'\'':cs) = ([c], cs) lexChar (c:'\'':cs) = ([c], cs) lexChar cs = ([], cs) lexString ('\\':c:cs) = (c:stchars, srest) where (stchars, srest) = lexString cs lexString ('"':cs) = ("", cs) lexString ("") = ("","") lexString (c:cs) = (c:stchars, srest) where (stchars, srest) = lexString cs isIdent (Ident _ ) = True isIdent _ = False getIdent (Ident s) = s isString (String _) = True isString _ = False getString (String s) = s