1/*
2 * Copyright © 2016-2019 Sören Tempel
3 *
4 * This program is free software: you can redistribute it and/or
5 * modify it under the terms of the GNU Affero General Public
6 * License as published by the Free Software Foundation, either
7 * version 3 of the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Affero General Public License for more details.
13 *
14 * You should have received a copy of the GNU Affero General Public
15 * License along with this program. If not, see
16 * <http://www.gnu.org/licenses/>.
17 */
18
19#include <assert.h>
20#include <stdio.h>
21#include <stdlib.h>
22
23#include <sys/types.h>
24
25#include "parser.h"
26#include "scanner.h"
27#include "token.h"
28#include "turing.h"
29#include "util.h"
30
31/**
32 * Macro which should be used if the next expected token is a semicolon token.
33 * If it isn't the proper error code is returned.
34 */
35#define EXPSEM(T) \
36 do { \
37 if (T->type != TOK_SEMICOLON) \
38 return PAR_SEMICOLON; \
39 } while (0)
40
41/**
42 * Returns the next token and advances the parser position.
43 *
44 * @param par Parser to extract next token from.
45 * @pre A previous call of this function should not have returned TOK_EOF.
46 * @returns Next token.
47 */
48static token *
49next(parser *par)
50{
51 token *tok;
52
53 if (par->prevtok != NULL)
54 freetoken(par->prevtok);
55
56 tok = par->peektok;
57 if (tok != NULL)
58 par->peektok = NULL;
59 else
60 tok = nexttoken(par->scr);
61
62 par->prevtok = tok;
63 return tok;
64}
65
66/**
67 * Returns the next token for the given parser without advancing the position.
68 *
69 * @pre A previous call of this ::next should not have returned TOK_EOF.
70 * @param par Parser to extract next token from.
71 * @returns Next token.
72 */
73static token *
74peek(parser *par)
75{
76 token *tok;
77
78 tok = par->peektok;
79 if (tok != NULL)
80 return tok;
81
82 par->peektok = nexttoken(par->scr);
83 return par->peektok;
84}
85
86/**
87 * Allocates memory for a new parser, initializes it and returns a
88 * pointer to it.
89 *
90 * @param str Input which should be parsed.
91 * @param len Length of the input.
92 * @returns Pointer to the newly created parser.
93 */
94parser *
95newparser(char *str, size_t len)
96{
97 parser *par;
98
99 par = emalloc(sizeof(parser));
100 par->scr = scanstr(str, len);
101 par->peektok = par->prevtok = par->tok = NULL;
102 return par;
103}
104
105/**
106 * Formats a parerr as a string and includes line and column information
107 * where the error (presumably) ocurred. The string is directly written
108 * to the given stream.
109 *
110 * @pre Parser must have encountered an error.
111 * @param par Parser which returned the parerr.
112 * @param err Parser error returned by ::parsetm.
113 * @param fn Name of the file the parser was trying to parse.
114 * @param stream Stream to write error message to.
115 * @return Number of characters written to the stream.
116 */
117int
118strparerr(parser *par, parerr err, char *fn, FILE *stream)
119{
120 int r;
121 size_t pos;
122 token *tok;
123 char *msg, *line, *marker;
124
125 assert(err != PAR_OK);
126 tok = par->tok;
127 assert(tok);
128 msg = "Unkown error.";
129
130 /* Check for scanner error. */
131 if (tok->type == TOK_ERROR) {
132 switch (tok->value) {
133 case ERR_OVERFLOW:
134 msg = "Numeric state name exceeds INT_MAX.";
135 goto ret;
136 case ERR_UNDERFLOW:
137 msg = "Numeric state names can't be negative.";
138 goto ret;
139 case ERR_UNKOWN:
140 msg = "Lexer encountered an unknown character.";
141 goto ret;
142 case ERR_UNEXPECTED:
143 msg = "A terminal string was expected but the "
144 "lexer encountered a character which is "
145 "not part of the expected string. Perhaps "
146 "you misspelled 'start:' or 'accept:'.";
147 goto ret;
148 }
149 }
150
151 /* Wasn't a lexer error so the err parameter is actually relevant. */
152 switch (err) {
153 case PAR_SEMICOLON:
154 msg = "Missing semicolon, maybe the previous transition "
155 "is missing a semicolon or a metadata information "
156 "was not properly terminated with a semicolon.";
157 break;
158 case PAR_STATEDEFTWICE:
159 msg = "This state was already defined previously. You can't "
160 "define states twice please move all transitions from "
161 "this state definition to the previous definition.";
162 break;
163 case PAR_TRANSDEFTWICE:
164 msg = "Only deterministic turing machines are supported. "
165 "Meaning you can't have more than one transition "
166 "for the same input symbol.";
167 break;
168 case PAR_STARTKEY:
169 msg = "An initial state wasn't defined. Please define it "
170 "using the 'start:' keyword.";
171 return fprintf(stream, "%s: %s\n", fn, msg);
172 case PAR_INITALSTATE:
173 msg = "The initial state value cannot be left empty.";
174 break;
175 case PAR_ACCEPTKEY:
176 msg = "Accepting states where not defined. Please define "
177 "one or more accepting states using the "
178 "'accept:' keyword.";
179 return fprintf(stream, "%s: %s\n", fn, msg);
180 case PAR_NONSTATEACCEPT:
181 msg = "Your accepting state list contains a token which is "
182 "not a state name or is empty.";
183 break;
184 case PAR_STATEDEF:
185 msg = "Expected a state definition but didn't find a valid "
186 "state name. Valid state names must match the "
187 "following regex: 'q[0-9]+'.";
188 break;
189 case PAR_LBRACKET:
190 msg = "The parser expected an opening curly bracket as a "
191 "part of this state definition.";
192 break;
193 case PAR_RBRACKET:
194 msg = "The parser expected a closing curly bracket as a "
195 "part of this state definition.";
196 break;
197 case PAR_RSYMBOL:
198 msg = "Your transition definition is missing a symbol "
199 "which triggers this transition. This symbol "
200 "can only be an alphanumeric character or the "
201 "special blank character.";
202 break;
203 case PAR_DIRECTION:
204 msg = "Expected direction to move head to, this symbol is "
205 "not a valid head direction symbol.";
206 break;
207 case PAR_WSYMBOL:
208 msg = "Your transition definition is missing a symbol "
209 "which is written to the tape when this transition "
210 "is performed. This symbol can only be an "
211 "alphanumeric character or the special blank character.";
212 break;
213 case PAR_NEXTSTATESYM:
214 msg = "The next state symbol ('=>') was expected but "
215 "not found.";
216 break;
217 case PAR_NEXTSTATE:
218 msg = "Your transition is missing a state to transit to "
219 "when performing this transition.";
220 break;
221 case PAR_OK:
222 /* Never reached */
223 break;
224 }
225
226ret:
227 if (!(line = linenum(par->scr->input, tok->line))) {
228 msg = "Current token contains an invalid line number. "
229 "This is a bug, please consider reporting it.";
230 return fprintf(stream, "%s\n", msg);
231 }
232
233 if (tok->type == TOK_EOF)
234 pos = endofline(line);
235 else
236 pos = tok->column - 1;
237
238 marker = mark(pos, line);
239 r = fprintf(stream, "%s:%d:%d: %s\n %s\n %s\n", fn, tok->line,
240 tok->column, msg, line, marker);
241
242 free(line);
243 free(marker);
244 return r;
245}
246
247/**
248 * Frees all resources for a given parser.
249 *
250 * @param par Pointer to the parser which should be freed.
251 */
252void
253freeparser(parser *par)
254{
255 assert(par);
256
257 /* Tokens are freed in the next method. */
258
259 freescanner(par->scr);
260 free(par);
261}
262
263/**
264 * Parses the metadata information of a tmsim input file.
265 *
266 * The following EBNF rules describes valid input:
267 *
268 * \code
269 * metadata = "start:", statename, ";",
270 * "accept:", statenames, ";";
271 * \endcode
272 *
273 * @param par Parser for which metadata should be parsed.
274 * @param dest Pointer to a Turing machine, if the metadata was
275 * parsed successfully the accept array and the start field
276 * of the dtm struct are set accordingly.
277 * @return Error code or PAR_OK if no error was encountered.
278 */
279static parerr
280parsemeta(parser *par, dtm *dest)
281{
282 par->tok = next(par);
283 if (par->tok->type != TOK_START)
284 return PAR_STARTKEY;
285
286 par->tok = next(par);
287 if (par->tok->type != TOK_STATE)
288 return PAR_INITALSTATE;
289 dest->start = par->tok->value;
290
291 par->tok = next(par);
292 EXPSEM(par->tok);
293
294 par->tok = next(par);
295 if (par->tok->type != TOK_ACCEPT)
296 return PAR_ACCEPTKEY;
297
298 do {
299 par->tok = next(par);
300 if (par->tok->type != TOK_STATE)
301 return PAR_NONSTATEACCEPT;
302 addaccept(dest, par->tok->value);
303
304 par->tok = next(par);
305 } while (par->tok->type == TOK_COMMA &&
306 par->tok->type != TOK_SEMICOLON);
307
308 EXPSEM(par->tok);
309
310 return PAR_OK;
311}
312
313/**
314 * Parses a state transition of a tmsim input file.
315 *
316 * The following EBNF rules describes valid input:
317 *
318 * \code
319 * transition = symbol, direction, symbol, "=>", statename;
320 * \endcode
321 *
322 * @param par Parser for which a transition should be parsed.
323 * @param dest Pointer to a transition, if the transition was parsed
324 * successfully, the struct fields are initialized accordingly.
325 * @return Error code or PAR_OK if no error was encountered.
326 */
327static parerr
328parsetrans(parser *par, tmtrans *dest)
329{
330 par->tok = next(par);
331 if (par->tok->type != TOK_SYMBOL)
332 return PAR_RSYMBOL;
333 dest->rsym = (char)par->tok->value;
334
335 par->tok = next(par);
336 switch (par->tok->type) {
337 case TOK_SMALLER:
338 dest->headdir = LEFT;
339 break;
340 case TOK_GREATER:
341 dest->headdir = RIGHT;
342 break;
343 case TOK_PIPE:
344 dest->headdir = STAY;
345 break;
346 default:
347 return PAR_DIRECTION;
348 }
349
350 par->tok = next(par);
351 if (par->tok->type != TOK_SYMBOL)
352 return PAR_WSYMBOL;
353 dest->wsym = (char)par->tok->value;
354
355 par->tok = next(par);
356 if (par->tok->type != TOK_NEXT)
357 return PAR_NEXTSTATESYM;
358
359 par->tok = next(par);
360 if (par->tok->type != TOK_STATE)
361 return PAR_NEXTSTATE;
362 dest->nextstate = par->tok->value;
363
364 return PAR_OK;
365}
366
367/**
368 * Parses a state definition of a tmsim input file.
369 *
370 * The following EBNF rules describes valid input:
371 *
372 * \code
373 * state = statename, "{", [ transitions ], "}";
374 * \endcode
375 *
376 * @param par Parser for which a state definition should be parsed.
377 * @param dest Pointer to a Turing state, if the state definition was parsed
378 * successfully, the struct fields are initialized accordingly.
379 * @return Error code or PAR_OK if no error was encountered.
380 */
381static parerr
382parsestate(parser *par, tmstate *dest)
383{
384 tmtrans *trans;
385 parerr ret;
386
387 par->tok = next(par);
388 if (par->tok->type != TOK_STATE)
389 return PAR_STATEDEF;
390 dest->name = par->tok->value;
391
392 par->tok = next(par);
393 if (par->tok->type != TOK_LBRACKET)
394 return PAR_LBRACKET;
395
396 while (peek(par)->type != TOK_RBRACKET) {
397 trans = emalloc(sizeof(tmtrans));
398 if ((ret = parsetrans(par, trans)) != PAR_OK) {
399 free(trans);
400 return ret;
401 }
402
403 if (addtrans(dest, trans)) {
404 free(trans);
405 return PAR_TRANSDEFTWICE;
406 }
407
408 par->tok = next(par);
409 EXPSEM(par->tok);
410 }
411
412 par->tok = next(par);
413 if (par->tok->type != TOK_RBRACKET)
414 return PAR_RBRACKET; /* Never reached. */
415
416 return PAR_OK;
417}
418
419/**
420 * Parses a series of state definitions of a tmsim input file.
421 *
422 * The following EBNF rules describes valid input:
423 *
424 * \code
425 * states = { state };
426 * \endcode
427 *
428 * @param par Parser for which a state definition should be parsed.
429 * @param dest Pointer to a Turing machine, if the states were parsed
430 * successfully, the struct fields are initialized accordingly.
431 * @return Error code or PAR_OK if no error was encountered.
432 */
433static parerr
434parsestates(parser *par, dtm *dest)
435{
436 parerr ret;
437 tmstate *state;
438
439 while (peek(par)->type != TOK_EOF) {
440 state = newtmstate();
441 if ((ret = parsestate(par, state)) != PAR_OK) {
442 free(state);
443 return ret;
444 }
445
446 if (addstate(dest, state)) {
447 free(state);
448 return PAR_STATEDEFTWICE;
449 }
450 }
451
452 /* skip and free EOF */
453 par->tok = next(par);
454 freetoken(par->tok);
455
456 return PAR_OK;
457}
458
459/**
460 * Parses a tmsim input file.
461 *
462 * The following EBNF rules describes valid input:
463 *
464 * \code
465 * turingmachine = metadata, states;
466 * \endcode
467 *
468 * @param par Parser for which a state definition should be parsed.
469 * @param dest Pointer to a Turing machine, if the states were parsed
470 * successfully, the struct fields are initialized accordingly.
471 * @return Error code or PAR_OK if no error was encountered.
472 */
473parerr
474parsetm(parser *par, dtm *dest)
475{
476 parerr ret;
477
478 if ((ret = parsemeta(par, dest)) != PAR_OK)
479 return ret;
480 if ((ret = parsestates(par, dest)) != PAR_OK)
481 return ret;
482
483 return PAR_OK;
484}