1-- | Parser combinators for the POSIX @Makefile@ specifications.
2module Mach.Parser where
3
4import Control.Exception (throwIO)
5import Control.Monad (unless, void, when)
6import Data.List (elemIndices)
7import Mach.Error (MakeErr (..))
8import qualified Mach.Types as T
9import Mach.Util (isSpecial)
10import System.IO (hGetContents, stdin)
11import Text.ParserCombinators.Parsec
12 ( Parser,
13 alphaNum,
14 anyChar,
15 between,
16 char,
17 eof,
18 lookAhead,
19 many,
20 many1,
21 manyTill,
22 newline,
23 noneOf,
24 oneOf,
25 optionMaybe,
26 parse,
27 parseFromFile,
28 sepBy,
29 sepBy1,
30 skipMany,
31 string,
32 try,
33 unexpected,
34 (<|>),
35 )
36
37-- Bind a given character to the given result.
38bind :: String -> a -> Parser a
39bind str val = val <$ string str
40
41-- | Parse one or more newlines.
42newlines :: Parser ()
43newlines = void $ many1 newline
44
45-- | Parse a single blank character.
46blank :: Parser Char
47blank = char ' '
48
49-- | Parse one or more <blank> characters.
50blanks :: Parser ()
51blanks = void $ many1 blank
52
53-- Parse zero or more <blank> characters.
54maybeBlanks :: Parser ()
55maybeBlanks = void $ many (char ' ')
56
57-- | Parse a character from the portable filename character set.
58--
59-- See https://pubs.opengroup.org/onlinepubs/007904975/basedefs/xbd_chap03.html#tag_03_276
60fnChar :: Parser Char
61fnChar = alphaNum <|> oneOf "._-"
62
63-- | Parse a macro name, according to portable macro names should
64-- consist exclusively of characters from the portabel filename
65-- character set.
66macroName :: Parser String
67macroName = many1 fnChar
68
69-- | Parse a target name character. As per POSIX, target names should
70-- only consist of slashes, hyphens, periods, underscores, digits and
71-- alphabetics.
72targetChar :: Parser Char
73targetChar = alphaNum <|> oneOf "/-._"
74
75-- | Parse an assignment operator, also refered to as a macro flavor
76-- in the POSIX standard. The implementation provided here should
77-- be aligned with the 'Show' instance of 'T.Flavor'.
78assignOp :: Parser T.Flavor
79assignOp =
80 bind "=" T.Delayed
81 <|> bind "::=" T.Immediate
82 <|> bind ":::=" T.StrictDelay
83 <|> bind "!=" T.System
84 <|> bind "?=" T.Cond
85 <|> bind "+=" T.Append
86
87-- | Parse an assignment, i.e. a macro definition.
88assign :: Parser T.Assign
89assign = do
90 mident <- tokens' [' ', '=']
91 flavor <- blanks >> assignOp <* blanks
92 T.Assign mident flavor <$> tokens
93
94-- | Parse a macro expansion.
95macroExpand :: Parser T.Token
96macroExpand = char '$' >> (singleChar <|> macroExpand')
97 where
98 singleChar :: Parser T.Token
99 singleChar = T.Exp <$> ((\x -> T.Lit [x]) <$> literal "{}()")
100
101-- | Parse a macro expansion enclosed in parentheses.
102macroExpand' :: Parser T.Token
103macroExpand' =
104 let inner = try subExpand <|> simpleExpand
105 in ( between (char '(') (char ')') inner
106 <|> between (char '{') (char '}') inner
107 )
108 where
109 -- Parse a macro expansion of the form $(string1).
110 simpleExpand :: Parser T.Token
111 simpleExpand =
112 T.Exp
113 <$> ( macroExpand
114 <|> macroExpand
115 <|> tokenLit (literal "})")
116 )
117
118 -- Parse a macro expansion of the form $(string1:subst1=[subst2]).
119 subExpand :: Parser T.Token
120 subExpand = do
121 -- TODO: Support nested macro expansion in string1
122 string1 <- T.Exp <$> (tokenLit $ literal ":})")
123 _ <- char ':'
124 subst1 <- many1 $ noneOf "="
125 _ <- char '='
126 subst2 <- many $ noneOf "})"
127 pure $ T.ExpSub string1 subst1 subst2
128
129-- | Parse a valid character for a literal.
130-- Characters not valid in the current context can be passed as well.
131literal :: [Char] -> Parser Char
132literal notValid = noneOf $ notValid ++ "#\n\\$"
133
134-- | Parse a single token, i.e. an escaped newline, escaped @$@ character, macro expansion, or literal.
135token :: Parser T.Token
136token = tokenLit $ literal []
137
138-- | Parse a token but use a custom parser for parsing of literal tokens.
139tokenLit :: Parser Char -> Parser T.Token
140tokenLit lit =
141 try macroExpand
142 <|> escDollar
143 <|> escNewline
144 <|> litToken
145 where
146 escDollar :: Parser T.Token
147 escDollar = bind "$$" (T.Lit "$")
148
149 escNewline :: Parser T.Token
150 escNewline = T.Lit " " <$ (string "\\\n" >> maybeBlanks)
151
152 -- TODO: In noneOf, check that \ is followed by a newline.
153 litToken :: Parser T.Token
154 litToken = T.Lit <$> many1 lit
155
156-- | Parse a sequence of zero or more 'T.Token'.
157tokens :: Parser T.Token
158tokens = T.Seq <$> many token
159
160-- | Like 'tokens' but allows passing characters not valid in the current context.
161tokens' :: [Char] -> Parser T.Token
162tokens' notValid = T.Seq <$> many (tokenLit $ literal notValid)
163
164-- | Parse multiple lines of commands prefixed by a tab character.
165commands :: Parser [T.Token]
166commands = many (char '\t' >> (tokens <* newline))
167
168-- | Inference rule.
169infRule :: Parser T.InfRule
170infRule = do
171 target <- char '.' >> (:) '.' <$> many1 targetChar
172
173 let periods = length $ elemIndices '.' target
174 when (isSpecial target || (periods /= 1 && periods /= 2)) $
175 unexpected "invalid inference rule target name"
176
177 cmds <-
178 char ':'
179 >> ( (const [] <$> (many1 blank >> char ';'))
180 <|> (many blank >> newline >> commands)
181 )
182
183 pure $ T.InfRule target cmds
184
185-- | Target rule which defines how targets are build.
186targetRule :: Parser T.TgtRule
187targetRule = do
188 targets <- sepBy1 (tokenLit targetChar) blank
189 _ <- char ':' >> (blank <|> lookAhead newline)
190 prereqs <- sepBy (tokenLit targetChar) blank
191 command <- optionMaybe (char ';' >> tokens)
192 _ <- newline
193
194 cmds <- commands
195 pure $ T.TgtRule targets prereqs (maybe cmds (: cmds) command)
196
197include :: Parser [T.Token]
198include = do
199 _ <- optionMaybe (char '-')
200 _ <- string "include" >> blanks
201 paths <- sepBy1 (tokenLit $ literal " ") blank
202 _ <- maybeBlanks
203 pure paths
204
205skipNoCode :: Parser ()
206skipNoCode = do
207 _ <- many blank
208 _ <- many newline
209 _ <- skipMany (try comment >> many blank >> many newline)
210 pure ()
211 where
212 comment :: Parser String
213 comment = char '#' >> manyTill anyChar newline
214
215lexeme :: Parser a -> Parser a
216lexeme p = p <* skipNoCode
217
218-- | Parse a POSIX @Makefile@.
219mkFile :: Parser T.MkFile
220mkFile =
221 skipNoCode
222 >> many
223 ( try (T.MkAssign <$> lexeme assign)
224 <|> try (T.MkInfRule <$> lexeme infRule)
225 <|> try (T.MkTgtRule <$> lexeme targetRule)
226 <|> try (T.MkInclude <$> lexeme include)
227 )
228 -- Ensure that we parse the whole Makefile
229 <* lexeme eof
230
231------------------------------------------------------------------------
232
233-- | Parse assignments and targets specified on the command-line.
234cmdLine :: String -> IO (T.MkFile, [FilePath])
235cmdLine str =
236 case parse cmdLine' "command-line" str of
237 Left err -> throwIO $ ParserErr err
238 Right mk -> pure mk
239 where
240 assign' :: Parser T.Assign
241 assign' = do
242 mident <- tokens' [' ', '=']
243 flavor <- assignOp
244 unless (flavor `elem` [T.Delayed, T.Immediate, T.StrictDelay]) $
245 unexpected "invalid assignment flavor"
246
247 T.Assign mident flavor <$> tokens' " "
248
249 cmdLine' :: Parser (T.MkFile, [FilePath])
250 cmdLine' = do
251 assigns <- many (try assign' <* many blank)
252 targets <- sepBy (many1 targetChar) blank
253 pure (map T.MkAssign assigns, targets)
254
255parseMkFile :: FilePath -> IO T.MkFile
256parseMkFile path = do
257 res <-
258 if path == "-"
259 then parse mkFile path <$> hGetContents stdin
260 else parseFromFile mkFile path
261 case res of
262 Left err -> throwIO $ ParserErr err
263 Right mk -> pure mk