mach

A work-in-progress implementation of make(1)

git clone https://git.8pit.net/mach.git

  1-- | Parser combinators for the POSIX @Makefile@ specifications.
  2module Mach.Parser where
  3
  4import Control.Exception (throwIO)
  5import Control.Monad (unless, void, when)
  6import Data.List (elemIndices)
  7import Mach.Error (MakeErr (..))
  8import qualified Mach.Types as T
  9import Mach.Util (isSpecial)
 10import System.IO (hGetContents, stdin)
 11import Text.ParserCombinators.Parsec
 12  ( Parser,
 13    alphaNum,
 14    anyChar,
 15    between,
 16    char,
 17    eof,
 18    lookAhead,
 19    many,
 20    many1,
 21    manyTill,
 22    newline,
 23    noneOf,
 24    oneOf,
 25    optionMaybe,
 26    parse,
 27    parseFromFile,
 28    sepBy,
 29    sepBy1,
 30    skipMany,
 31    string,
 32    try,
 33    unexpected,
 34    (<|>),
 35  )
 36
 37-- Bind a given character to the given result.
 38bind :: String -> a -> Parser a
 39bind str val = val <$ string str
 40
 41-- | Parse one or more newlines.
 42newlines :: Parser ()
 43newlines = void $ many1 newline
 44
 45-- | Parse a single blank character.
 46blank :: Parser Char
 47blank = char ' '
 48
 49-- | Parse one or more <blank> characters.
 50blanks :: Parser ()
 51blanks = void $ many1 blank
 52
 53-- Parse zero or more <blank> characters.
 54maybeBlanks :: Parser ()
 55maybeBlanks = void $ many (char ' ')
 56
 57-- | Parse a character from the portable filename character set.
 58--
 59-- See https://pubs.opengroup.org/onlinepubs/007904975/basedefs/xbd_chap03.html#tag_03_276
 60fnChar :: Parser Char
 61fnChar = alphaNum <|> oneOf "._-"
 62
 63-- | Parse a macro name, according to portable macro names should
 64-- consist exclusively of characters from the portabel filename
 65-- character set.
 66macroName :: Parser String
 67macroName = many1 fnChar
 68
 69-- | Parse a target name character. As per POSIX, target names should
 70-- only consist of slashes, hyphens, periods, underscores, digits and
 71-- alphabetics.
 72targetChar :: Parser Char
 73targetChar = alphaNum <|> oneOf "/-._"
 74
 75-- | Parse an assignment operator, also refered to as a macro flavor
 76-- in the POSIX standard. The implementation provided here should
 77-- be aligned with the 'Show' instance of 'T.Flavor'.
 78assignOp :: Parser T.Flavor
 79assignOp =
 80  bind "=" T.Delayed
 81    <|> bind "::=" T.Immediate
 82    <|> bind ":::=" T.StrictDelay
 83    <|> bind "!=" T.System
 84    <|> bind "?=" T.Cond
 85    <|> bind "+=" T.Append
 86
 87-- | Parse an assignment, i.e. a macro definition.
 88assign :: Parser T.Assign
 89assign = do
 90  mident <- tokens' [' ', '=']
 91  flavor <- blanks >> assignOp <* blanks
 92  T.Assign mident flavor <$> tokens
 93
 94-- | Parse a macro expansion.
 95macroExpand :: Parser T.Token
 96macroExpand = char '$' >> (singleChar <|> macroExpand')
 97  where
 98    singleChar :: Parser T.Token
 99    singleChar = T.Exp <$> ((\x -> T.Lit [x]) <$> literal "{}()")
100
101-- | Parse a macro expansion enclosed in parentheses.
102macroExpand' :: Parser T.Token
103macroExpand' =
104  let inner = try subExpand <|> simpleExpand
105   in ( between (char '(') (char ')') inner
106          <|> between (char '{') (char '}') inner
107      )
108  where
109    -- Parse a macro expansion of the form $(string1).
110    simpleExpand :: Parser T.Token
111    simpleExpand =
112      T.Exp
113        <$> ( macroExpand
114                <|> macroExpand
115                <|> tokenLit (literal "})")
116            )
117
118    -- Parse a macro expansion of the form $(string1:subst1=[subst2]).
119    subExpand :: Parser T.Token
120    subExpand = do
121      -- TODO: Support nested macro expansion in string1
122      string1 <- T.Exp <$> (tokenLit $ literal ":})")
123      _ <- char ':'
124      subst1 <- many1 $ noneOf "="
125      _ <- char '='
126      subst2 <- many $ noneOf "})"
127      pure $ T.ExpSub string1 subst1 subst2
128
129-- | Parse a valid character for a literal.
130-- Characters not valid in the current context can be passed as well.
131literal :: [Char] -> Parser Char
132literal notValid = noneOf $ notValid ++ "#\n\\$"
133
134-- | Parse a single token, i.e. an escaped newline, escaped @$@ character, macro expansion, or literal.
135token :: Parser T.Token
136token = tokenLit $ literal []
137
138-- | Parse a token but use a custom parser for parsing of literal tokens.
139tokenLit :: Parser Char -> Parser T.Token
140tokenLit lit =
141  try macroExpand
142    <|> escDollar
143    <|> escNewline
144    <|> litToken
145  where
146    escDollar :: Parser T.Token
147    escDollar = bind "$$" (T.Lit "$")
148
149    escNewline :: Parser T.Token
150    escNewline = T.Lit " " <$ (string "\\\n" >> maybeBlanks)
151
152    -- TODO: In noneOf, check that \ is followed by a newline.
153    litToken :: Parser T.Token
154    litToken = T.Lit <$> many1 lit
155
156-- | Parse a sequence of zero or more 'T.Token'.
157tokens :: Parser T.Token
158tokens = T.Seq <$> many token
159
160-- | Like 'tokens' but allows passing characters not valid in the current context.
161tokens' :: [Char] -> Parser T.Token
162tokens' notValid = T.Seq <$> many (tokenLit $ literal notValid)
163
164-- | Parse multiple lines of commands prefixed by a tab character.
165commands :: Parser [T.Token]
166commands = many (char '\t' >> (tokens <* newline))
167
168-- | Inference rule.
169infRule :: Parser T.InfRule
170infRule = do
171  target <- char '.' >> (:) '.' <$> many1 targetChar
172
173  let periods = length $ elemIndices '.' target
174  when (isSpecial target || (periods /= 1 && periods /= 2)) $
175    unexpected "invalid inference rule target name"
176
177  cmds <-
178    char ':'
179      >> ( (const [] <$> (many1 blank >> char ';'))
180             <|> (many blank >> newline >> commands)
181         )
182
183  pure $ T.InfRule target cmds
184
185-- | Target rule which defines how targets are build.
186targetRule :: Parser T.TgtRule
187targetRule = do
188  targets <- sepBy1 (tokenLit targetChar) blank
189  _ <- char ':' >> (blank <|> lookAhead newline)
190  prereqs <- sepBy (tokenLit targetChar) blank
191  command <- optionMaybe (char ';' >> tokens)
192  _ <- newline
193
194  cmds <- commands
195  pure $ T.TgtRule targets prereqs (maybe cmds (: cmds) command)
196
197include :: Parser [T.Token]
198include = do
199  _ <- optionMaybe (char '-')
200  _ <- string "include" >> blanks
201  paths <- sepBy1 (tokenLit $ literal " ") blank
202  _ <- maybeBlanks
203  pure paths
204
205skipNoCode :: Parser ()
206skipNoCode = do
207  _ <- many blank
208  _ <- many newline
209  _ <- skipMany (try comment >> many blank >> many newline)
210  pure ()
211  where
212    comment :: Parser String
213    comment = char '#' >> manyTill anyChar newline
214
215lexeme :: Parser a -> Parser a
216lexeme p = p <* skipNoCode
217
218-- | Parse a POSIX @Makefile@.
219mkFile :: Parser T.MkFile
220mkFile =
221  skipNoCode
222    >> many
223      ( try (T.MkAssign <$> lexeme assign)
224          <|> try (T.MkInfRule <$> lexeme infRule)
225          <|> try (T.MkTgtRule <$> lexeme targetRule)
226          <|> try (T.MkInclude <$> lexeme include)
227      )
228      -- Ensure that we parse the whole Makefile
229      <* lexeme eof
230
231------------------------------------------------------------------------
232
233-- | Parse assignments and targets specified on the command-line.
234cmdLine :: String -> IO (T.MkFile, [FilePath])
235cmdLine str =
236  case parse cmdLine' "command-line" str of
237    Left err -> throwIO $ ParserErr err
238    Right mk -> pure mk
239  where
240    assign' :: Parser T.Assign
241    assign' = do
242      mident <- tokens' [' ', '=']
243      flavor <- assignOp
244      unless (flavor `elem` [T.Delayed, T.Immediate, T.StrictDelay]) $
245        unexpected "invalid assignment flavor"
246
247      T.Assign mident flavor <$> tokens' " "
248
249    cmdLine' :: Parser (T.MkFile, [FilePath])
250    cmdLine' = do
251      assigns <- many (try assign' <* many blank)
252      targets <- sepBy (many1 targetChar) blank
253      pure (map T.MkAssign assigns, targets)
254
255parseMkFile :: FilePath -> IO T.MkFile
256parseMkFile path = do
257  res <-
258    if path == "-"
259      then parse mkFile path <$> hGetContents stdin
260      else parseFromFile mkFile path
261  case res of
262    Left err -> throwIO $ ParserErr err
263    Right mk -> pure mk