1/*2 * Copyright © 2016-2019 Sören Tempel3 *4 * This program is free software: you can redistribute it and/or5 * modify it under the terms of the GNU Affero General Public6 * License as published by the Free Software Foundation, either7 * version 3 of the License, or (at your option) any later version.8 *9 * This program is distributed in the hope that it will be useful,10 * but WITHOUT ANY WARRANTY; without even the implied warranty of11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU12 * Affero General Public License for more details.13 *14 * You should have received a copy of the GNU Affero General Public15 * License along with this program. If not, see16 * <http://www.gnu.org/licenses/>.17 */1819#include <assert.h>20#include <stdio.h>21#include <stdlib.h>2223#include <sys/types.h>2425#include "parser.h"26#include "scanner.h"27#include "token.h"28#include "turing.h"29#include "util.h"3031/**32 * Macro which should be used if the next expected token is a semicolon token.33 * If it isn't the proper error code is returned.34 */35#define EXPSEM(T) \36 do { \37 if (T->type != TOK_SEMICOLON) \38 return PAR_SEMICOLON; \39 } while (0)4041/**42 * Returns the next token and advances the parser position.43 *44 * @param par Parser to extract next token from.45 * @pre A previous call of this function should not have returned TOK_EOF.46 * @returns Next token.47 */48static token *49next(parser *par)50{51 token *tok;5253 if (par->prevtok != NULL)54 freetoken(par->prevtok);5556 tok = par->peektok;57 if (tok != NULL)58 par->peektok = NULL;59 else60 tok = nexttoken(par->scr);6162 par->prevtok = tok;63 return tok;64}6566/**67 * Returns the next token for the given parser without advancing the position.68 *69 * @pre A previous call of this ::next should not have returned TOK_EOF.70 * @param par Parser to extract next token from.71 * @returns Next token.72 */73static token *74peek(parser *par)75{76 token *tok;7778 tok = par->peektok;79 if (tok != NULL)80 return tok;8182 par->peektok = nexttoken(par->scr);83 return par->peektok;84}8586/**87 * Allocates memory for a new parser, initializes it and returns a88 * pointer to it.89 *90 * @param str Input which should be parsed.91 * @param len Length of the input.92 * @returns Pointer to the newly created parser.93 */94parser *95newparser(char *str, size_t len)96{97 parser *par;9899 par = emalloc(sizeof(parser));100 par->scr = scanstr(str, len);101 par->peektok = par->prevtok = par->tok = NULL;102 return par;103}104105/**106 * Formats a parerr as a string and includes line and column information107 * where the error (presumably) ocurred. The string is directly written108 * to the given stream.109 *110 * @pre Parser must have encountered an error.111 * @param par Parser which returned the parerr.112 * @param err Parser error returned by ::parsetm.113 * @param fn Name of the file the parser was trying to parse.114 * @param stream Stream to write error message to.115 * @return Number of characters written to the stream.116 */117int118strparerr(parser *par, parerr err, char *fn, FILE *stream)119{120 int r;121 size_t pos;122 token *tok;123 char *msg, *line, *marker;124125 assert(err != PAR_OK);126 tok = par->tok;127 assert(tok);128 msg = "Unkown error.";129130 /* Check for scanner error. */131 if (tok->type == TOK_ERROR) {132 switch (tok->value) {133 case ERR_OVERFLOW:134 msg = "Numeric state name exceeds INT_MAX.";135 goto ret;136 case ERR_UNDERFLOW:137 msg = "Numeric state names can't be negative.";138 goto ret;139 case ERR_UNKOWN:140 msg = "Lexer encountered an unknown character.";141 goto ret;142 case ERR_UNEXPECTED:143 msg = "A terminal string was expected but the "144 "lexer encountered a character which is "145 "not part of the expected string. Perhaps "146 "you misspelled 'start:' or 'accept:'.";147 goto ret;148 }149 }150151 /* Wasn't a lexer error so the err parameter is actually relevant. */152 switch (err) {153 case PAR_SEMICOLON:154 msg = "Missing semicolon, maybe the previous transition "155 "is missing a semicolon or a metadata information "156 "was not properly terminated with a semicolon.";157 break;158 case PAR_STATEDEFTWICE:159 msg = "This state was already defined previously. You can't "160 "define states twice please move all transitions from "161 "this state definition to the previous definition.";162 break;163 case PAR_TRANSDEFTWICE:164 msg = "Only deterministic turing machines are supported. "165 "Meaning you can't have more than one transition "166 "for the same input symbol.";167 break;168 case PAR_STARTKEY:169 msg = "An initial state wasn't defined. Please define it "170 "using the 'start:' keyword.";171 return fprintf(stream, "%s: %s\n", fn, msg);172 case PAR_INITALSTATE:173 msg = "The initial state value cannot be left empty.";174 break;175 case PAR_ACCEPTKEY:176 msg = "Accepting states where not defined. Please define "177 "one or more accepting states using the "178 "'accept:' keyword.";179 return fprintf(stream, "%s: %s\n", fn, msg);180 case PAR_NONSTATEACCEPT:181 msg = "Your accepting state list contains a token which is "182 "not a state name or is empty.";183 break;184 case PAR_STATEDEF:185 msg = "Expected a state definition but didn't find a valid "186 "state name. Valid state names must match the "187 "following regex: 'q[0-9]+'.";188 break;189 case PAR_LBRACKET:190 msg = "The parser expected an opening curly bracket as a "191 "part of this state definition.";192 break;193 case PAR_RBRACKET:194 msg = "The parser expected a closing curly bracket as a "195 "part of this state definition.";196 break;197 case PAR_RSYMBOL:198 msg = "Your transition definition is missing a symbol "199 "which triggers this transition. This symbol "200 "can only be an alphanumeric character or the "201 "special blank character.";202 break;203 case PAR_DIRECTION:204 msg = "Expected direction to move head to, this symbol is "205 "not a valid head direction symbol.";206 break;207 case PAR_WSYMBOL:208 msg = "Your transition definition is missing a symbol "209 "which is written to the tape when this transition "210 "is performed. This symbol can only be an "211 "alphanumeric character or the special blank character.";212 break;213 case PAR_NEXTSTATESYM:214 msg = "The next state symbol ('=>') was expected but "215 "not found.";216 break;217 case PAR_NEXTSTATE:218 msg = "Your transition is missing a state to transit to "219 "when performing this transition.";220 break;221 case PAR_OK:222 /* Never reached */223 break;224 }225226ret:227 if (!(line = linenum(par->scr->input, tok->line))) {228 msg = "Current token contains an invalid line number. "229 "This is a bug, please consider reporting it.";230 return fprintf(stream, "%s\n", msg);231 }232233 if (tok->type == TOK_EOF)234 pos = endofline(line);235 else236 pos = tok->column - 1;237238 marker = mark(pos, line);239 r = fprintf(stream, "%s:%d:%d: %s\n %s\n %s\n", fn, tok->line,240 tok->column, msg, line, marker);241242 free(line);243 free(marker);244 return r;245}246247/**248 * Frees all resources for a given parser.249 *250 * @param par Pointer to the parser which should be freed.251 */252void253freeparser(parser *par)254{255 assert(par);256257 /* Tokens are freed in the next method. */258259 freescanner(par->scr);260 free(par);261}262263/**264 * Parses the metadata information of a tmsim input file.265 *266 * The following EBNF rules describes valid input:267 *268 * \code269 * metadata = "start:", statename, ";",270 * "accept:", statenames, ";";271 * \endcode272 *273 * @param par Parser for which metadata should be parsed.274 * @param dest Pointer to a Turing machine, if the metadata was275 * parsed successfully the accept array and the start field276 * of the dtm struct are set accordingly.277 * @return Error code or PAR_OK if no error was encountered.278 */279static parerr280parsemeta(parser *par, dtm *dest)281{282 par->tok = next(par);283 if (par->tok->type != TOK_START)284 return PAR_STARTKEY;285286 par->tok = next(par);287 if (par->tok->type != TOK_STATE)288 return PAR_INITALSTATE;289 dest->start = par->tok->value;290291 par->tok = next(par);292 EXPSEM(par->tok);293294 par->tok = next(par);295 if (par->tok->type != TOK_ACCEPT)296 return PAR_ACCEPTKEY;297298 do {299 par->tok = next(par);300 if (par->tok->type != TOK_STATE)301 return PAR_NONSTATEACCEPT;302 addaccept(dest, par->tok->value);303304 par->tok = next(par);305 } while (par->tok->type == TOK_COMMA &&306 par->tok->type != TOK_SEMICOLON);307308 EXPSEM(par->tok);309310 return PAR_OK;311}312313/**314 * Parses a state transition of a tmsim input file.315 *316 * The following EBNF rules describes valid input:317 *318 * \code319 * transition = symbol, direction, symbol, "=>", statename;320 * \endcode321 *322 * @param par Parser for which a transition should be parsed.323 * @param dest Pointer to a transition, if the transition was parsed324 * successfully, the struct fields are initialized accordingly.325 * @return Error code or PAR_OK if no error was encountered.326 */327static parerr328parsetrans(parser *par, tmtrans *dest)329{330 par->tok = next(par);331 if (par->tok->type != TOK_SYMBOL)332 return PAR_RSYMBOL;333 dest->rsym = (char)par->tok->value;334335 par->tok = next(par);336 switch (par->tok->type) {337 case TOK_SMALLER:338 dest->headdir = LEFT;339 break;340 case TOK_GREATER:341 dest->headdir = RIGHT;342 break;343 case TOK_PIPE:344 dest->headdir = STAY;345 break;346 default:347 return PAR_DIRECTION;348 }349350 par->tok = next(par);351 if (par->tok->type != TOK_SYMBOL)352 return PAR_WSYMBOL;353 dest->wsym = (char)par->tok->value;354355 par->tok = next(par);356 if (par->tok->type != TOK_NEXT)357 return PAR_NEXTSTATESYM;358359 par->tok = next(par);360 if (par->tok->type != TOK_STATE)361 return PAR_NEXTSTATE;362 dest->nextstate = par->tok->value;363364 return PAR_OK;365}366367/**368 * Parses a state definition of a tmsim input file.369 *370 * The following EBNF rules describes valid input:371 *372 * \code373 * state = statename, "{", [ transitions ], "}";374 * \endcode375 *376 * @param par Parser for which a state definition should be parsed.377 * @param dest Pointer to a Turing state, if the state definition was parsed378 * successfully, the struct fields are initialized accordingly.379 * @return Error code or PAR_OK if no error was encountered.380 */381static parerr382parsestate(parser *par, tmstate *dest)383{384 tmtrans *trans;385 parerr ret;386387 par->tok = next(par);388 if (par->tok->type != TOK_STATE)389 return PAR_STATEDEF;390 dest->name = par->tok->value;391392 par->tok = next(par);393 if (par->tok->type != TOK_LBRACKET)394 return PAR_LBRACKET;395396 while (peek(par)->type != TOK_RBRACKET) {397 trans = emalloc(sizeof(tmtrans));398 if ((ret = parsetrans(par, trans)) != PAR_OK) {399 free(trans);400 return ret;401 }402403 if (addtrans(dest, trans)) {404 free(trans);405 return PAR_TRANSDEFTWICE;406 }407408 par->tok = next(par);409 EXPSEM(par->tok);410 }411412 par->tok = next(par);413 if (par->tok->type != TOK_RBRACKET)414 return PAR_RBRACKET; /* Never reached. */415416 return PAR_OK;417}418419/**420 * Parses a series of state definitions of a tmsim input file.421 *422 * The following EBNF rules describes valid input:423 *424 * \code425 * states = { state };426 * \endcode427 *428 * @param par Parser for which a state definition should be parsed.429 * @param dest Pointer to a Turing machine, if the states were parsed430 * successfully, the struct fields are initialized accordingly.431 * @return Error code or PAR_OK if no error was encountered.432 */433static parerr434parsestates(parser *par, dtm *dest)435{436 parerr ret;437 tmstate *state;438439 while (peek(par)->type != TOK_EOF) {440 state = newtmstate();441 if ((ret = parsestate(par, state)) != PAR_OK) {442 free(state);443 return ret;444 }445446 if (addstate(dest, state)) {447 free(state);448 return PAR_STATEDEFTWICE;449 }450 }451452 /* skip and free EOF */453 par->tok = next(par);454 freetoken(par->tok);455456 return PAR_OK;457}458459/**460 * Parses a tmsim input file.461 *462 * The following EBNF rules describes valid input:463 *464 * \code465 * turingmachine = metadata, states;466 * \endcode467 *468 * @param par Parser for which a state definition should be parsed.469 * @param dest Pointer to a Turing machine, if the states were parsed470 * successfully, the struct fields are initialized accordingly.471 * @return Error code or PAR_OK if no error was encountered.472 */473parerr474parsetm(parser *par, dtm *dest)475{476 parerr ret;477478 if ((ret = parsemeta(par, dest)) != PAR_OK)479 return ret;480 if ((ret = parsestates(par, dest)) != PAR_OK)481 return ret;482483 return PAR_OK;484}