{-# OPTIONS_GHC -fno-implicit-prelude #-} ----------------------------------------------------------------------------- -- | -- Module : Text.Read.Lex -- Copyright : (c) The University of Glasgow 2002 -- License : BSD-style (see the file libraries/base/LICENSE) -- -- Maintainer : libraries@haskell.org -- Stability : provisional -- Portability : non-portable (uses Text.ParserCombinators.ReadP) -- -- The cut-down Haskell lexer, used by Text.Read -- ----------------------------------------------------------------------------- module Text.Read.Lex -- lexing types ( Lexeme(..) -- :: *; Show, Eq -- lexer , lex -- :: ReadP Lexeme Skips leading spaces , hsLex -- :: ReadP String , lexChar -- :: ReadP Char Reads just one char, with H98 escapes , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a , readOctP -- :: Num a => ReadP a , readDecP -- :: Num a => ReadP a , readHexP -- :: Num a => ReadP a ) where import Text.ParserCombinators.ReadP #ifdef __GLASGOW_HASKELL__ import GHC.Base import GHC.Num( Num(..), Integer ) import GHC.Show( Show(..) ) #ifndef __HADDOCK__ import {-# SOURCE #-} GHC.Unicode ( isSpace, isAlpha, isAlphaNum ) #endif import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral, toInteger, (^), (^^), infinity, notANumber ) import GHC.List import GHC.Enum( maxBound ) #else import Prelude hiding ( lex ) import Data.Char( chr, ord, isSpace, isAlpha, isAlphaNum ) import Data.Ratio( Ratio, (%) ) #endif #ifdef __HUGS__ import Hugs.Prelude( Ratio(..) ) #endif import Data.Maybe import Control.Monad -- ----------------------------------------------------------------------------- -- Lexing types -- ^ Haskell lexemes. data Lexeme = Char Char -- ^ Character literal | String String -- ^ String literal, with escapes interpreted | Punc String -- ^ Punctuation or reserved symbol, e.g. @(@, @::@ | Ident String -- ^ Haskell identifier, e.g. @foo@, @Baz@ | Symbol String -- ^ Haskell symbol, e.g. @>>@, @:%@ | Int Integer -- ^ Integer literal | Rat Rational -- ^ Floating point literal | EOF deriving (Eq, Show) -- ----------------------------------------------------------------------------- -- Lexing lex :: ReadP Lexeme lex = skipSpaces >> lexToken hsLex :: ReadP String -- ^ Haskell lexer: returns the lexed string, rather than the lexeme hsLex = do skipSpaces (s,_) <- gather lexToken return s lexToken :: ReadP Lexeme lexToken = lexEOF +++ lexLitChar +++ lexString +++ lexPunc +++ lexSymbol +++ lexId +++ lexNumber -- ---------------------------------------------------------------------- -- End of file lexEOF :: ReadP Lexeme lexEOF = do s <- look guard (null s) return EOF -- --------------------------------------------------------------------------- -- Single character lexemes lexPunc :: ReadP Lexeme lexPunc = do c <- satisfy isPuncChar return (Punc [c]) where isPuncChar c = c `elem` ",;()[]{}`" -- ---------------------------------------------------------------------- -- Symbols lexSymbol :: ReadP Lexeme lexSymbol = do s <- munch1 isSymbolChar if s `elem` reserved_ops then return (Punc s) -- Reserved-ops count as punctuation else return (Symbol s) where isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~" reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"] -- ---------------------------------------------------------------------- -- identifiers lexId :: ReadP Lexeme lexId = lex_nan <++ lex_id where -- NaN and Infinity look like identifiers, so -- we parse them first. lex_nan = (string "NaN" >> return (Rat notANumber)) +++ (string "Infinity" >> return (Rat infinity)) lex_id = do c <- satisfy isIdsChar s <- munch isIdfChar return (Ident (c:s)) -- Identifiers can start with a '_' isIdsChar c = isAlpha c || c == '_' isIdfChar c = isAlphaNum c || c `elem` "_'" #ifndef __GLASGOW_HASKELL__ infinity, notANumber :: Rational infinity = 1 :% 0 notANumber = 0 :% 0 #endif -- --------------------------------------------------------------------------- -- Lexing character literals lexLitChar :: ReadP Lexeme lexLitChar = do char '\'' (c,esc) <- lexCharE guard (esc || c /= '\'') -- Eliminate '' possibility char '\'' return (Char c) lexChar :: ReadP Char lexChar = do { (c,_) <- lexCharE; return c } lexCharE :: ReadP (Char, Bool) -- "escaped or not"? lexCharE = do c <- get if c == '\\' then do c <- lexEsc; return (c, True) else do return (c, False) where lexEsc = lexEscChar +++ lexNumeric +++ lexCntrlChar +++ lexAscii lexEscChar = do c <- get case c of 'a' -> return '\a' 'b' -> return '\b' 'f' -> return '\f' 'n' -> return '\n' 'r' -> return '\r' 't' -> return '\t' 'v' -> return '\v' '\\' -> return '\\' '\"' -> return '\"' '\'' -> return '\'' _ -> pfail lexNumeric = do base <- lexBaseChar <++ return 10 n <- lexInteger base guard (n <= toInteger (ord maxBound)) return (chr (fromInteger n)) lexCntrlChar = do char '^' c <- get case c of '@' -> return '\^@' 'A' -> return '\^A' 'B' -> return '\^B' 'C' -> return '\^C' 'D' -> return '\^D' 'E' -> return '\^E' 'F' -> return '\^F' 'G' -> return '\^G' 'H' -> return '\^H' 'I' -> return '\^I' 'J' -> return '\^J' 'K' -> return '\^K' 'L' -> return '\^L' 'M' -> return '\^M' 'N' -> return '\^N' 'O' -> return '\^O' 'P' -> return '\^P' 'Q' -> return '\^Q' 'R' -> return '\^R' 'S' -> return '\^S' 'T' -> return '\^T' 'U' -> return '\^U' 'V' -> return '\^V' 'W' -> return '\^W' 'X' -> return '\^X' 'Y' -> return '\^Y' 'Z' -> return '\^Z' '[' -> return '\^[' '\\' -> return '\^\' ']' -> return '\^]' '^' -> return '\^^' '_' -> return '\^_' _ -> pfail lexAscii = do choice [ (string "SOH" >> return '\SOH') <++ (string "SO" >> return '\SO') -- \SO and \SOH need maximal-munch treatment -- See the Haskell report Sect 2.6 , string "NUL" >> return '\NUL' , string "STX" >> return '\STX' , string "ETX" >> return '\ETX' , string "EOT" >> return '\EOT' , string "ENQ" >> return '\ENQ' , string "ACK" >> return '\ACK' , string "BEL" >> return '\BEL' , string "BS" >> return '\BS' , string "HT" >> return '\HT' , string "LF" >> return '\LF' , string "VT" >> return '\VT' , string "FF" >> return '\FF' , string "CR" >> return '\CR' , string "SI" >> return '\SI' , string "DLE" >> return '\DLE' , string "DC1" >> return '\DC1' , string "DC2" >> return '\DC2' , string "DC3" >> return '\DC3' , string "DC4" >> return '\DC4' , string "NAK" >> return '\NAK' , string "SYN" >> return '\SYN' , string "ETB" >> return '\ETB' , string "CAN" >> return '\CAN' , string "EM" >> return '\EM' , string "SUB" >> return '\SUB' , string "ESC" >> return '\ESC' , string "FS" >> return '\FS' , string "GS" >> return '\GS' , string "RS" >> return '\RS' , string "US" >> return '\US' , string "SP" >> return '\SP' , string "DEL" >> return '\DEL' ] -- --------------------------------------------------------------------------- -- string literal lexString :: ReadP Lexeme lexString = do char '"' body id where body f = do (c,esc) <- lexStrItem if c /= '"' || esc then body (f.(c:)) else let s = f "" in return (String s) lexStrItem = (lexEmpty >> lexStrItem) +++ lexCharE lexEmpty = do char '\\' c <- get case c of '&' -> do return () _ | isSpace c -> do skipSpaces; char '\\'; return () _ -> do pfail -- --------------------------------------------------------------------------- -- Lexing numbers type Base = Int type Digits = [Int] lexNumber :: ReadP Lexeme lexNumber = lexHexOct <++ -- First try for hex or octal 0x, 0o etc -- If that fails, try for a decimal number lexDecNumber -- Start with ordinary digits lexHexOct :: ReadP Lexeme lexHexOct = do char '0' base <- lexBaseChar digits <- lexDigits base return (Int (val (fromIntegral base) 0 digits)) lexBaseChar :: ReadP Int -- Lex a single character indicating the base; fail if not there lexBaseChar = do { c <- get; case c of 'o' -> return 8 'O' -> return 8 'x' -> return 16 'X' -> return 16 _ -> pfail } lexDecNumber :: ReadP Lexeme lexDecNumber = do xs <- lexDigits 10 mFrac <- lexFrac <++ return Nothing mExp <- lexExp <++ return Nothing return (value xs mFrac mExp) where value xs mFrac mExp = valueFracExp (val 10 0 xs) mFrac mExp valueFracExp :: Integer -> Maybe Digits -> Maybe Integer -> Lexeme valueFracExp a Nothing Nothing = Int a -- 43 valueFracExp a Nothing (Just exp) | exp >= 0 = Int (a * (10 ^ exp)) -- 43e7 | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7 valueFracExp a (Just fs) mExp = case mExp of Nothing -> Rat rat -- 4.3 Just exp -> Rat (valExp rat exp) -- 4.3e-4 where rat :: Rational rat = fromInteger a + frac 10 0 1 fs valExp :: Rational -> Integer -> Rational valExp rat exp = rat * (10 ^^ exp) lexFrac :: ReadP (Maybe Digits) -- Read the fractional part; fail if it doesn't -- start ".d" where d is a digit lexFrac = do char '.' frac <- lexDigits 10 return (Just frac) lexExp :: ReadP (Maybe Integer) lexExp = do char 'e' +++ char 'E' exp <- signedExp +++ lexInteger 10 return (Just exp) where signedExp = do c <- char '-' +++ char '+' n <- lexInteger 10 return (if c == '-' then -n else n) lexDigits :: Int -> ReadP Digits -- Lex a non-empty sequence of digits in specified base lexDigits base = do s <- look xs <- scan s id guard (not (null xs)) return xs where scan (c:cs) f = case valDig base c of Just n -> do get; scan cs (f.(n:)) Nothing -> do return (f []) scan [] f = do return (f []) lexInteger :: Base -> ReadP Integer lexInteger base = do xs <- lexDigits base return (val (fromIntegral base) 0 xs) val :: Num a => a -> a -> Digits -> a -- val base y [d1,..,dn] = y ++ [d1,..,dn], as it were val base y [] = y val base y (x:xs) = y' `seq` val base y' xs where y' = y * base + fromIntegral x frac :: Integral a => a -> a -> a -> Digits -> Ratio a frac base a b [] = a % b frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs where a' = a * base + fromIntegral x b' = b * base valDig :: Num a => a -> Char -> Maybe Int valDig 8 c | '0' <= c && c <= '7' = Just (ord c - ord '0') | otherwise = Nothing valDig 10 c = valDecDig c valDig 16 c | '0' <= c && c <= '9' = Just (ord c - ord '0') | 'a' <= c && c <= 'f' = Just (ord c - ord 'a' + 10) | 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10) | otherwise = Nothing valDecDig c | '0' <= c && c <= '9' = Just (ord c - ord '0') | otherwise = Nothing -- ---------------------------------------------------------------------- -- other numeric lexing functions readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a readIntP base isDigit valDigit = do s <- munch1 isDigit return (val base 0 (map valDigit s)) readIntP' :: Num a => a -> ReadP a readIntP' base = readIntP base isDigit valDigit where isDigit c = maybe False (const True) (valDig base c) valDigit c = maybe 0 id (valDig base c) readOctP, readDecP, readHexP :: Num a => ReadP a readOctP = readIntP' 8 readDecP = readIntP' 10 readHexP = readIntP' 16