aboutsummaryrefslogtreecommitdiffstats
path: root/src/Verismith/Verilog/Parser.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Verismith/Verilog/Parser.hs')
-rw-r--r--src/Verismith/Verilog/Parser.hs511
1 files changed, 511 insertions, 0 deletions
diff --git a/src/Verismith/Verilog/Parser.hs b/src/Verismith/Verilog/Parser.hs
new file mode 100644
index 0000000..a6eaf24
--- /dev/null
+++ b/src/Verismith/Verilog/Parser.hs
@@ -0,0 +1,511 @@
+{-|
+Module : Verismith.Verilog.Parser
+Description : Minimal Verilog parser to reconstruct the AST.
+Copyright : (c) 2019, Yann Herklotz
+License : GPL-3
+Maintainer : yann [at] yannherklotz [dot] com
+Stability : experimental
+Portability : POSIX
+
+Minimal Verilog parser to reconstruct the AST. This parser does not support the
+whole Verilog syntax, as the AST does not support it either.
+-}
+
+module Verismith.Verilog.Parser
+ ( -- * Parser
+ parseVerilog
+ , parseVerilogFile
+ , parseSourceInfoFile
+ -- ** Internal parsers
+ , parseEvent
+ , parseStatement
+ , parseModItem
+ , parseModDecl
+ , Parser
+ )
+where
+
+import Control.Lens
+import Control.Monad (void)
+import Data.Bifunctor (bimap)
+import Data.Bits
+import Data.Functor (($>))
+import Data.Functor.Identity (Identity)
+import Data.List (isInfixOf, isPrefixOf, null)
+import Data.List.NonEmpty (NonEmpty (..))
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.IO as T
+import Text.Parsec hiding (satisfy)
+import Text.Parsec.Expr
+import Verismith.Internal
+import Verismith.Verilog.AST
+import Verismith.Verilog.BitVec
+import Verismith.Verilog.Internal
+import Verismith.Verilog.Lex
+import Verismith.Verilog.Preprocess
+import Verismith.Verilog.Token
+
+type Parser = Parsec [Token] ()
+
+type ParseOperator = Operator [Token] () Identity
+
+data Decimal = Decimal Int Integer
+
+instance Num Decimal where
+ (Decimal sa na) + (Decimal sb nb) = Decimal (max sa sb) (na + nb)
+ (Decimal sa na) - (Decimal sb nb) = Decimal (max sa sb) (na - nb)
+ (Decimal sa na) * (Decimal sb nb) = Decimal (max sa sb) (na * nb)
+ negate (Decimal s n) = Decimal s $ negate n
+ abs (Decimal s n) = Decimal s $ abs n
+ signum (Decimal s n) = Decimal s $ signum n
+ fromInteger = Decimal 32 . fromInteger
+
+-- | This parser succeeds whenever the given predicate returns true when called
+-- with parsed `Token`. Same as 'Text.Parsec.Char.satisfy'.
+satisfy :: (Token -> Bool) -> Parser TokenName
+satisfy f = tokenPrim show nextPos tokeq
+ where
+ tokeq :: Token -> Maybe TokenName
+ tokeq t@(Token t' _ _) = if f t then Just t' else Nothing
+
+satisfy' :: (Token -> Maybe a) -> Parser a
+satisfy' = tokenPrim show nextPos
+
+nextPos :: SourcePos -> Token -> [Token] -> SourcePos
+nextPos pos _ (Token _ _ (Position _ l c) : _) =
+ setSourceColumn (setSourceLine pos l) c
+nextPos pos _ [] = pos
+
+-- | Parses given `TokenName`.
+tok :: TokenName -> Parser TokenName
+tok t = satisfy (\(Token t' _ _) -> t' == t) <?> show t
+
+-- | Parse without returning the `TokenName`.
+tok' :: TokenName -> Parser ()
+tok' p = void $ tok p
+
+parens :: Parser a -> Parser a
+parens = between (tok SymParenL) (tok SymParenR)
+
+brackets :: Parser a -> Parser a
+brackets = between (tok SymBrackL) (tok SymBrackR)
+
+braces :: Parser a -> Parser a
+braces = between (tok SymBraceL) (tok SymBraceR)
+
+sBinOp :: BinaryOperator -> Expr -> Expr -> Expr
+sBinOp = sOp BinOp where sOp f b a = f a b
+
+parseExpr' :: Parser Expr
+parseExpr' = buildExpressionParser parseTable parseTerm <?> "expr"
+
+decToExpr :: Decimal -> Expr
+decToExpr (Decimal s n) = Number $ bitVec s n
+
+-- | Parse a Number depending on if it is in a hex or decimal form. Octal and
+-- binary are not supported yet.
+parseNum :: Parser Expr
+parseNum = decToExpr <$> number
+
+parseVar :: Parser Expr
+parseVar = Id <$> identifier
+
+parseVecSelect :: Parser Expr
+parseVecSelect = do
+ i <- identifier
+ expr <- brackets parseExpr
+ return $ VecSelect i expr
+
+parseRangeSelect :: Parser Expr
+parseRangeSelect = do
+ i <- identifier
+ range <- parseRange
+ return $ RangeSelect i range
+
+systemFunc :: Parser String
+systemFunc = satisfy' matchId
+ where
+ matchId (Token IdSystem s _) = Just s
+ matchId _ = Nothing
+
+parseFun :: Parser Expr
+parseFun = do
+ f <- systemFunc
+ expr <- parens parseExpr
+ return $ Appl (Identifier $ T.pack f) expr
+
+parserNonEmpty :: [a] -> Parser (NonEmpty a)
+parserNonEmpty (a : b) = return $ a :| b
+parserNonEmpty [] = fail "Concatenation cannot be empty."
+
+parseTerm :: Parser Expr
+parseTerm =
+ parens parseExpr
+ <|> (Concat <$> (braces (commaSep parseExpr) >>= parserNonEmpty))
+ <|> parseFun
+ <|> parseNum
+ <|> try parseVecSelect
+ <|> try parseRangeSelect
+ <|> parseVar
+ <?> "simple expr"
+
+-- | Parses the ternary conditional operator. It will behave in a right
+-- associative way.
+parseCond :: Expr -> Parser Expr
+parseCond e = do
+ tok' SymQuestion
+ expr <- parseExpr
+ tok' SymColon
+ Cond e expr <$> parseExpr
+
+parseExpr :: Parser Expr
+parseExpr = do
+ e <- parseExpr'
+ option e . try $ parseCond e
+
+parseConstExpr :: Parser ConstExpr
+parseConstExpr = fmap exprToConst parseExpr
+
+-- | Table of binary and unary operators that encode the right precedence for
+-- each.
+parseTable :: [[ParseOperator Expr]]
+parseTable =
+ [ [prefix SymBang (UnOp UnLNot), prefix SymTildy (UnOp UnNot)]
+ , [ prefix SymAmp (UnOp UnAnd)
+ , prefix SymBar (UnOp UnOr)
+ , prefix SymTildyAmp (UnOp UnNand)
+ , prefix SymTildyBar (UnOp UnNor)
+ , prefix SymHat (UnOp UnXor)
+ , prefix SymTildyHat (UnOp UnNxor)
+ , prefix SymHatTildy (UnOp UnNxorInv)
+ ]
+ , [prefix SymPlus (UnOp UnPlus), prefix SymDash (UnOp UnMinus)]
+ , [binary SymAsterAster (sBinOp BinPower) AssocRight]
+ , [ binary SymAster (sBinOp BinTimes) AssocLeft
+ , binary SymSlash (sBinOp BinDiv) AssocLeft
+ , binary SymPercent (sBinOp BinMod) AssocLeft
+ ]
+ , [ binary SymPlus (sBinOp BinPlus) AssocLeft
+ , binary SymDash (sBinOp BinPlus) AssocLeft
+ ]
+ , [ binary SymLtLt (sBinOp BinLSL) AssocLeft
+ , binary SymGtGt (sBinOp BinLSR) AssocLeft
+ ]
+ , [ binary SymLtLtLt (sBinOp BinASL) AssocLeft
+ , binary SymGtGtGt (sBinOp BinASR) AssocLeft
+ ]
+ , [ binary SymLt (sBinOp BinLT) AssocNone
+ , binary SymGt (sBinOp BinGT) AssocNone
+ , binary SymLtEq (sBinOp BinLEq) AssocNone
+ , binary SymGtEq (sBinOp BinLEq) AssocNone
+ ]
+ , [ binary SymEqEq (sBinOp BinEq) AssocNone
+ , binary SymBangEq (sBinOp BinNEq) AssocNone
+ ]
+ , [ binary SymEqEqEq (sBinOp BinEq) AssocNone
+ , binary SymBangEqEq (sBinOp BinNEq) AssocNone
+ ]
+ , [binary SymAmp (sBinOp BinAnd) AssocLeft]
+ , [ binary SymHat (sBinOp BinXor) AssocLeft
+ , binary SymHatTildy (sBinOp BinXNor) AssocLeft
+ , binary SymTildyHat (sBinOp BinXNorInv) AssocLeft
+ ]
+ , [binary SymBar (sBinOp BinOr) AssocLeft]
+ , [binary SymAmpAmp (sBinOp BinLAnd) AssocLeft]
+ , [binary SymBarBar (sBinOp BinLOr) AssocLeft]
+ ]
+
+binary :: TokenName -> (a -> a -> a) -> Assoc -> ParseOperator a
+binary name fun = Infix ((tok name <?> "binary") >> return fun)
+
+prefix :: TokenName -> (a -> a) -> ParseOperator a
+prefix name fun = Prefix ((tok name <?> "prefix") >> return fun)
+
+commaSep :: Parser a -> Parser [a]
+commaSep = flip sepBy $ tok SymComma
+
+parseContAssign :: Parser ContAssign
+parseContAssign = do
+ var <- tok KWAssign *> identifier
+ expr <- tok SymEq *> parseExpr
+ tok' SymSemi
+ return $ ContAssign var expr
+
+numLit :: Parser String
+numLit = satisfy' matchId
+ where
+ matchId (Token LitNumber s _) = Just s
+ matchId _ = Nothing
+
+number :: Parser Decimal
+number = number' <$> numLit
+ where
+ number' :: String -> Decimal
+ number' a | all (`elem` ['0' .. '9']) a = fromInteger $ read a
+ | head a == '\'' = fromInteger $ f a
+ | "'" `isInfixOf` a = Decimal (read w) (f b)
+ | otherwise = error $ "Invalid number format: " ++ a
+ where
+ w = takeWhile (/= '\'') a
+ b = dropWhile (/= '\'') a
+ f a'
+ | "'d" `isPrefixOf` a' = read $ drop 2 a'
+ | "'h" `isPrefixOf` a' = read $ "0x" ++ drop 2 a'
+ | "'b" `isPrefixOf` a' = foldl
+ (\n b' -> shiftL n 1 .|. (if b' == '1' then 1 else 0))
+ 0
+ (drop 2 a')
+ | otherwise = error $ "Invalid number format: " ++ a'
+
+-- toInteger' :: Decimal -> Integer
+-- toInteger' (Decimal _ n) = n
+
+toInt' :: Decimal -> Int
+toInt' (Decimal _ n) = fromInteger n
+
+-- | Parse a range and return the total size. As it is inclusive, 1 has to be
+-- added to the difference.
+parseRange :: Parser Range
+parseRange = do
+ rangeH <- tok SymBrackL *> parseConstExpr
+ rangeL <- tok SymColon *> parseConstExpr
+ tok' SymBrackR
+ return $ Range rangeH rangeL
+
+strId :: Parser String
+strId = satisfy' matchId
+ where
+ matchId (Token IdSimple s _) = Just s
+ matchId (Token IdEscaped s _) = Just s
+ matchId _ = Nothing
+
+identifier :: Parser Identifier
+identifier = Identifier . T.pack <$> strId
+
+parseNetDecl :: Maybe PortDir -> Parser ModItem
+parseNetDecl pd = do
+ t <- option Wire type_
+ sign <- option False (tok KWSigned $> True)
+ range <- option 1 parseRange
+ name <- identifier
+ i <- option Nothing (fmap Just (tok' SymEq *> parseConstExpr))
+ tok' SymSemi
+ return $ Decl pd (Port t sign range name) i
+ where type_ = tok KWWire $> Wire <|> tok KWReg $> Reg
+
+parsePortDir :: Parser PortDir
+parsePortDir =
+ tok KWOutput
+ $> PortOut
+ <|> tok KWInput
+ $> PortIn
+ <|> tok KWInout
+ $> PortInOut
+
+parseDecl :: Parser ModItem
+parseDecl = (Just <$> parsePortDir >>= parseNetDecl) <|> parseNetDecl Nothing
+
+parseConditional :: Parser Statement
+parseConditional = do
+ expr <- tok' KWIf *> parens parseExpr
+ true <- maybeEmptyStatement
+ false <- option Nothing (tok' KWElse *> maybeEmptyStatement)
+ return $ CondStmnt expr true false
+
+parseLVal :: Parser LVal
+parseLVal = fmap RegConcat (braces $ commaSep parseExpr) <|> ident
+ where
+ ident = do
+ i <- identifier
+ (try (ex i) <|> try (sz i) <|> return (RegId i))
+ ex i = do
+ e <- tok' SymBrackL *> parseExpr
+ tok' SymBrackR
+ return $ RegExpr i e
+ sz i = RegSize i <$> parseRange
+
+parseDelay :: Parser Delay
+parseDelay = Delay . toInt' <$> (tok' SymPound *> number)
+
+parseAssign :: TokenName -> Parser Assign
+parseAssign t = do
+ lval <- parseLVal
+ tok' t
+ delay <- option Nothing (fmap Just parseDelay)
+ expr <- parseExpr
+ return $ Assign lval delay expr
+
+parseLoop :: Parser Statement
+parseLoop = do
+ a <- tok' KWFor *> tok' SymParenL *> parseAssign SymEq
+ expr <- tok' SymSemi *> parseExpr
+ incr <- tok' SymSemi *> parseAssign SymEq
+ tok' SymParenR
+ statement <- parseStatement
+ return $ ForLoop a expr incr statement
+
+eventList :: TokenName -> Parser [Event]
+eventList t = do
+ l <- sepBy parseEvent' (tok t)
+ if null l then fail "Could not parse list" else return l
+
+parseEvent :: Parser Event
+parseEvent =
+ tok' SymAtAster
+ $> EAll
+ <|> try (tok' SymAt *> tok' SymParenLAsterParenR $> EAll)
+ <|> try
+ ( tok' SymAt
+ *> tok' SymParenL
+ *> tok' SymAster
+ *> tok' SymParenR
+ $> EAll
+ )
+ <|> try (tok' SymAt *> parens parseEvent')
+ <|> try (tok' SymAt *> parens (foldr1 EOr <$> eventList KWOr))
+ <|> try (tok' SymAt *> parens (foldr1 EComb <$> eventList SymComma))
+
+parseEvent' :: Parser Event
+parseEvent' =
+ try (tok' KWPosedge *> fmap EPosEdge identifier)
+ <|> try (tok' KWNegedge *> fmap ENegEdge identifier)
+ <|> try (fmap EId identifier)
+ <|> try (fmap EExpr parseExpr)
+
+parseEventCtrl :: Parser Statement
+parseEventCtrl = do
+ event <- parseEvent
+ statement <- option Nothing maybeEmptyStatement
+ return $ EventCtrl event statement
+
+parseDelayCtrl :: Parser Statement
+parseDelayCtrl = do
+ delay <- parseDelay
+ statement <- option Nothing maybeEmptyStatement
+ return $ TimeCtrl delay statement
+
+parseBlocking :: Parser Statement
+parseBlocking = do
+ a <- parseAssign SymEq
+ tok' SymSemi
+ return $ BlockAssign a
+
+parseNonBlocking :: Parser Statement
+parseNonBlocking = do
+ a <- parseAssign SymLtEq
+ tok' SymSemi
+ return $ NonBlockAssign a
+
+parseSeq :: Parser Statement
+parseSeq = do
+ seq' <- tok' KWBegin *> many parseStatement
+ tok' KWEnd
+ return $ SeqBlock seq'
+
+parseStatement :: Parser Statement
+parseStatement =
+ parseSeq
+ <|> parseConditional
+ <|> parseLoop
+ <|> parseEventCtrl
+ <|> parseDelayCtrl
+ <|> try parseBlocking
+ <|> parseNonBlocking
+
+maybeEmptyStatement :: Parser (Maybe Statement)
+maybeEmptyStatement =
+ (tok' SymSemi >> return Nothing) <|> (Just <$> parseStatement)
+
+parseAlways :: Parser ModItem
+parseAlways = tok' KWAlways *> (Always <$> parseStatement)
+
+parseInitial :: Parser ModItem
+parseInitial = tok' KWInitial *> (Initial <$> parseStatement)
+
+namedModConn :: Parser ModConn
+namedModConn = do
+ target <- tok' SymDot *> identifier
+ expr <- parens parseExpr
+ return $ ModConnNamed target expr
+
+parseModConn :: Parser ModConn
+parseModConn = try (fmap ModConn parseExpr) <|> namedModConn
+
+parseModInst :: Parser ModItem
+parseModInst = do
+ m <- identifier
+ name <- identifier
+ modconns <- parens (commaSep parseModConn)
+ tok' SymSemi
+ return $ ModInst m name modconns
+
+parseModItem :: Parser ModItem
+parseModItem =
+ try (ModCA <$> parseContAssign)
+ <|> try parseDecl
+ <|> parseAlways
+ <|> parseInitial
+ <|> parseModInst
+
+parseModList :: Parser [Identifier]
+parseModList = list <|> return [] where list = parens $ commaSep identifier
+
+filterDecl :: PortDir -> ModItem -> Bool
+filterDecl p (Decl (Just p') _ _) = p == p'
+filterDecl _ _ = False
+
+modPorts :: PortDir -> [ModItem] -> [Port]
+modPorts p mis = filter (filterDecl p) mis ^.. traverse . declPort
+
+parseParam :: Parser Parameter
+parseParam = do
+ i <- tok' KWParameter *> identifier
+ expr <- tok' SymEq *> parseConstExpr
+ return $ Parameter i expr
+
+parseParams :: Parser [Parameter]
+parseParams = tok' SymPound *> parens (commaSep parseParam)
+
+parseModDecl :: Parser ModDecl
+parseModDecl = do
+ name <- tok KWModule *> identifier
+ paramList <- option [] $ try parseParams
+ _ <- fmap defaultPort <$> parseModList
+ tok' SymSemi
+ modItem <- option [] . try $ many1 parseModItem
+ tok' KWEndmodule
+ return $ ModDecl name
+ (modPorts PortOut modItem)
+ (modPorts PortIn modItem)
+ modItem
+ paramList
+
+-- | Parses a 'String' into 'Verilog' by skipping any beginning whitespace
+-- and then parsing multiple Verilog source.
+parseVerilogSrc :: Parser Verilog
+parseVerilogSrc = Verilog <$> many parseModDecl
+
+-- | Parse a 'String' containing verilog code. The parser currently only supports
+-- the subset of Verilog that is being generated randomly.
+parseVerilog
+ :: Text -- ^ Name of parsed object.
+ -> Text -- ^ Content to be parsed.
+ -> Either Text Verilog -- ^ Returns 'String' with error
+ -- message if parse fails.
+parseVerilog s =
+ bimap showT id
+ . parse parseVerilogSrc (T.unpack s)
+ . alexScanTokens
+ . preprocess [] (T.unpack s)
+ . T.unpack
+
+parseVerilogFile :: Text -> IO Verilog
+parseVerilogFile file = do
+ src <- T.readFile $ T.unpack file
+ case parseVerilog file src of
+ Left s -> error $ T.unpack s
+ Right r -> return r
+
+parseSourceInfoFile :: Text -> Text -> IO SourceInfo
+parseSourceInfoFile top = fmap (SourceInfo top) . parseVerilogFile