Haskell写的Parser
干货第二波,Haskell实现的Parser, 支持运算语句和备注等,输出可以作为Interpreter的输入
Parser combinator 选用的是 ReadP.
module Parser.Impl where
import SubsAst
import Text.ParserCombinators.ReadP as P
import Data.Char
import Control.Applicative
data ParseError = ParseError String
deriving (Show, Eq)
-- allowed chars which should be allowed in pString
allowedChars :: ReadP Char
allowedChars = do
_ <- char '\\'
c <- satisfy (`elem` "\'nt\\")
case c of
'n' -> return '\n'
't' -> return '\t'
x -> return x
-- keywords which should not be used as a var name no when assign or get var
reserved :: [String]
reserved = ["if", "for", "of", "true","false","undefined"]
-- helper function for read chars of variable name
checkChar :: Char -> Char -> Bool
checkChar sym x = x == sym
-- return Var
pIdent :: ReadP Expr
pIdent = token $ do
fist <- satisfy isLetter
send <- munch (\x -> isLetter x || isDigit x || checkChar '_' x)
let idt = fist : send
if idt `notElem` reserved then return $ Var idt
else fail "keyword can not be used as ident"
-- doing get Ident for assign
pIdget :: ReadP Ident
pIdget = do
fist <- satisfy isLetter
send <- munch (\x -> isLetter x || isDigit x || checkChar '_' x)
let idt = fist : send
if idt `notElem` reserved then return idt
else fail "keyword can not be used as ident"
-- for readin number
pNumber :: ReadP Expr
pNumber = do
sym <- option ' ' (char '-')
number <- munch1 isDigit
if length number <= 8 then
return (Number (read (sym : number) :: Int))
else fail "invalid number"
-- for readin string
pString :: ReadP Expr
pString = do
_ <- char '\''
s <- P.many (allowedChars
<|> satisfy (\x -> isPrint x && notElem x "\t\'\\'")
<|> (do
_ <- char '\\'
_ <- char '\n'
return '\0'))
_ <- char '\''
return (String s)
-- comment
pComment :: ReadP String
pComment = between (string "//") (char '\n') (munch ( /= '\n'))
-- token
-- we keep the skipMany1 pComment for skip comments before token but
-- it might cause time out sometimes
token :: ReadP a -> ReadP a
token p = do
skipSpaces <|> skipMany1 pComment
a <- p
skipSpaces <|> skipMany1 pComment
return a
-- symbol is using for read in a symbol or keyword such as "for" and "if"
symbol :: String -> ReadP String
symbol s = token $ string s
-- read true
pTrue :: ReadP Expr
pTrue = do
_ <- token $ string "true"
return TrueConst
-- read false
pFalse :: ReadP Expr
pFalse = do
_ <- token $ string "false"
return FalseConst
-- read Undefined
pUndefined :: ReadP Expr
pUndefined = do
_ <- token $ string "undefined"
return Undefined
-- stands for the Eq of our grammar tree
pAssign :: ReadP Expr
pAssign = do
idt <- token pIdget
_ <- symbol "="
val <- token pExpr0
return (Assign idt val)
-- array
pArray :: ReadP Expr
pArray = (do
_ <- symbol "["
exps <- pExprs
_ <- symbol "]"
return (Array exps))
<++ (do
_ <- symbol "["
_ <- symbol "]"
return (Array [Undefined]))
-- stands for the Expr of root of grammar tree
pExpr :: ReadP Expr
pExpr = (do
a <- token pExpr1
b <- token pouterComma
return (Comma a b))
<++ token pAssign
<++ token pExpr1
-- stands for the Expr0 of grammar tree,with precidence of 0
pExpr0 :: ReadP Expr
pExpr0 = (do
_ <- symbol "="
token pExpr0)
<++ token pExpr1
-- stands for the Expr1 of grammar tree,with precidence of 1
pExpr1 :: ReadP Expr
pExpr1 = (do
exp2 <- token pExpr2
_ <- symbol "==="
exp1 <- token pExpr2
pOpExpr1 (Call "===" [exp2,exp1])) <++
(do
exp2 <- token pExpr2
_ <- symbol "<"
exp1 <- token pExpr2
pOpExpr1 (Call "<" [exp2,exp1])) <++
token pAssign <++
token pExpr2
-- to help pExpr1 deal with left associativity
pOpExpr1 :: Expr -> ReadP Expr
pOpExpr1 a = (do
_ <- symbol "==="
exp2 <- token pExpr1
return (Call "===" [a,exp2])) <++
(do
_ <- symbol "<"
exp2 <- token pExpr1
return (Call "<" [a,exp2])) <++
return a
-- stands for the Expr2 of grammar tree,with precidence of 2
pExpr2 :: ReadP Expr
pExpr2 = (do
a <- token pExpr3
_ <- symbol "+"
b <- token pExpr3
pOpExpr2 (Call "+" [a,b])) <++
(do
a <- token pExpr3
_ <- symbol "-"
b <- token pExpr3
pOpExpr2 (Call "-" [a,b])) <++
token pExpr3
-- to help pExpr2 deal with left associativity
pOpExpr2 :: Expr -> ReadP Expr
pOpExpr2 a = (do
_ <- symbol "+"
exp2 <- token pExpr2
return (Call "+" [a,exp2])) <++
(do
_ <- symbol "-"
exp2 <- token pExpr2
return (Call "-" [a,exp2])) <++
return a
-- stands for the Expr3 of grammar tree,with precidence of 3
pExpr3 :: ReadP Expr
pExpr3 = (do
a <- token pExpr4
_ <- symbol "*"
b <- token pExpr4
pOpExpr3 (Call "*" [a,b])) <++
(do
a <- token pExpr4
_ <- symbol "%"
b <- token pExpr4
pOpExpr3 (Call "%" [a,b])) <++
token pExpr4
-- to help pExpr3 deal with left associativity
pOpExpr3 :: Expr -> ReadP Expr
pOpExpr3 a = (do
_ <- symbol "*"
exp2 <- token pExpr3
return (Call "*" [a,exp2])) <++
(do
_ <- symbol "%"
exp2 <- token pExpr3
return (Call "%" [a,exp2])) <++
return a
-- stands for the Expr4 of grammar tree,with precidence of 4
pExpr4 :: ReadP Expr
pExpr4 = (do
_ <- symbol "("
e <- token pExpr
_ <- symbol ")"
return e)
<++ pArray
<++(do
_ <- symbol "["
e <- token pComprFor
_ <- symbol "]"
return e)
<++ (do
idt <- token pIdget
_ <- symbol "("
e <- token pExprs
_ <- symbol ")"
return $ Call idt e)
<++ pNumber
<++ pIdent
<++ pString
<++ pTrue
<++ pFalse
<++ pUndefined
-- stands for the OuterComma of the grammar tree
pouterComma :: ReadP Expr
pouterComma = do
_ <- symbol ","
token pExpr
-- helpfunction for pcommaExprs and pExprs
commaHelper :: ReadP [Expr]
commaHelper = do
exp1 <- token pExpr1
com <- token pcommaExprs
return (exp1:com)
-- stands for the commaExprs of the grammar tree
pcommaExprs :: ReadP [Expr]
pcommaExprs = (do
_ <- symbol ","
commaHelper)
<++ (do
_ <- symbol ","
exp1 <- token pExpr1
return [exp1])
-- stands for the Exprs of the grammar tree
pExprs :: ReadP [Expr]
pExprs = (do
exp1 <- token pExpr1
return [exp1])
<++ commaHelper
-- stands for ArrayFor of the grammar tree
pArrayFor :: ReadP ArrayCompr
pArrayFor = do
_ <- symbol "for"
_ <- symbol "("
id' <- token pIdget
_ <- symbol "of"
exp1 <- token pExpr1
_ <- symbol ")"
ac <- token pArrayCompr
return (ACFor id' exp1 ac)
-- stands for the ArrayIf of the grammar tree
pArrayIf :: ReadP ArrayCompr
pArrayIf = do
_ <- symbol "if"
_ <- symbol "("
exp1 <- token pExpr1
_ <- symbol ")"
ac <- token pArrayCompr
return (ACIf exp1 ac)
-- organize the ACIf ,ACFor and ACBody
pArrayCompr :: ReadP ArrayCompr
pArrayCompr = token pArrayIf
<++ token pArrayFor
<++ (do
ex <- token pExpr1
return (ACBody ex))
-- stands for the ArrayCompr of grammar tree
pComprFor :: ReadP Expr
pComprFor = do
ar <- pArrayFor
return (Compr ar)
-- out put the parse result
parseString :: String -> Either ParseError Expr
parseString str = if null (readP_to_S pExpr str) then
Left(ParseError "Invalid expression") else
(do
let legalstr = [x | x <- readP_to_S pExpr str,snd x == ""]
if null legalstr then Left( ParseError "Invalid expression") else
(do
let stri = fst (head legalstr)
case stri of
String s -> Right (String [x | x <- s, x `notElem` "\NUL"])
a -> Right a))

浙公网安备 33010602011771号