---------------------------------------------------------------- -- the Henk Parser -- Copyright 2000, Jan-Willem Roorda and Daan Leijen ---------------------------------------------------------------- module HenkParser where import Text.ParserCombinators.Parsec import qualified Text.ParserCombinators.Parsec.Token as P import Text.ParserCombinators.Parsec.Language import HenkAS ---------------------------------------------------------------- -- the Henk Parser -- -- anonymous variables are any identifiers starting with "_" -- -- unknown types (those that need to be inferred) can explicitly -- be given using "?" -- -- instead of grammar: "var : aexpr" as in the henk paper, -- we use "var : expr" instead. This means that variable -- sequences as in \, |~|, \/ and /\ expressions need to -- be comma seperated. Pattern variables are also comma -- seperated. The case arrow (->) now needs to be (=>) in -- order to distinguish the end of the pattern from function -- arrows. ---------------------------------------------------------------- program = do{ whiteSpace ; ts <- semiSep tdecl ; vs <- semiSep vdecl ; eof ; return $ Program ts vs } ---------------------------------------------------------------- -- Type declarations ---------------------------------------------------------------- tdecl = do{ reserved "data" ; t <- bindVar ; symbol "=" ; ts <- braces (semiSep1 tvar) ; return $ Data t ts } ---------------------------------------------------------------- -- Value declarations ---------------------------------------------------------------- vdecl :: Parser ValueDecl vdecl = do{ reserved "let" ; b <- bind ; return $ Let b } <|> do{ reserved "letrec" ; bs <- braces (semiSep1 bind) ; return $ LetRec bs } bind = do{ t <- tvar ; symbol "=" ; e <- expr ; return $ Bind t e } ---------------------------------------------------------------- -- Expressions ---------------------------------------------------------------- expr :: Parser Expr expr = choice [ letExpr , forallExpr -- forall before lambda! \/ vs. \ , lambdaExpr , piExpr , caseExpr , functionExpr , bigLamdaExpr ] "expression" letExpr = do{ vd <- vdecl ; reserved "in" ; e <- expr ; return (In vd e) } lambdaExpr = do{ symbol "\\" ; ts <- commaSep1 bindVar ; symbol "." ; e <- expr ; return $ (foldr Lam e ts) } piExpr = do{ symbol "|~|" ; ts <- commaSep1 bindVar ; symbol "." ; e <- expr ; return (foldr Pi e ts) } ---------------------------------------------------------------- -- Case expressions ---------------------------------------------------------------- caseExpr = do{ reserved "case" ; e <- expr ; reserved "of" ; as <- braces (semiSep1 alt) ; es <- option [] (do{ reserved "at" ; braces (semiSep expr) }) ; return (Case e as es) } alt = do{ pat <- pattern ; symbol "=>" ; e <- expr ; return (pat e) } pattern = do{ p <- atomPattern ; vs <- commaSep boundVar ; return (\e -> Alt p (foldr Lam e vs)) } atomPattern = do{ v <- boundVar ; return (PatVar v) } <|> do{ l <- literal ; return (PatLit l) } "pattern" ---------------------------------------------------------------- -- Syntactic sugar: ->, \/, /\ ---------------------------------------------------------------- functionExpr = chainr1 appExpr arrow where arrow = do{ symbol "->" ; return ((\x y -> Pi (TVar anonymous x) y)) } "" bigLamdaExpr = do{ symbol "/\\" ; ts <- commaSep1 bindVar ; symbol "." ; e <- expr ; return (foldr Lam e ts) } forallExpr = do{ try (symbol "\\/") -- use "try" to try "\" (lambda) too. ; ts <- commaSep1 bindVar ; symbol "." ; e <- expr ; return (foldr Pi e ts) } ---------------------------------------------------------------- -- Simple expressions ---------------------------------------------------------------- appExpr = do{ es <- many1 atomExpr ; return (foldl1 App es) } atomExpr = parens expr <|> do{ v <- boundVar; return (Var v) } <|> do{ l <- literal; return (Lit l)} <|> do{ symbol "*"; return Star } <|> do{ symbol "[]"; return Box } <|> do{ symbol "?"; return Unknown } "simple expression" ---------------------------------------------------------------- -- Variables & Literals ---------------------------------------------------------------- variable = identifier anonymousVar = lexeme $ do{ c <- char '_' ; cs <- many (identLetter henkDef) ; return (c:cs) } bindVar = do{ i <- variable <|> anonymousVar ; do{ e <- varType ; return (TVar i e) } <|> return (TVar i Star) } "variable" boundVar = do{ i <- variable ; do{ e <- varType ; return (TVar i e) } <|> return (TVar i Unknown) } "variable" tvar = do{ v <- variable ; t <- varType ; return (TVar v t) } "typed variable" varType = do{ symbol ":" ; expr } "variable type" literal = do{ i <- natural ; return (LitInt i) } "literal" ---------------------------------------------------------------- -- Tokens ---------------------------------------------------------------- henk = P.makeTokenParser henkDef lexeme = P.lexeme henk parens = P.parens henk braces = P.braces henk semiSep = P.semiSep henk semiSep1 = P.semiSep1 henk commaSep = P.commaSep henk commaSep1 = P.commaSep1 henk whiteSpace = P.whiteSpace henk symbol = P.symbol henk identifier = P.identifier henk reserved = P.reserved henk natural = P.natural henk henkDef = haskellStyle { identStart = letter , identLetter = alphaNum <|> oneOf "_'" , opStart = opLetter henkDef , opLetter = oneOf ":=\\->/|~.*[]" , reservedOpNames = ["::","=","\\","->","=>","/\\","\\/" ,"|~|",".",":","*","[]"] , reservedNames = [ "case", "data", "letrec", "type" , "import", "in", "let", "of", "at" ] }