tmsim

A fast turing machine simulator with graphviz export functionality

git clone https://git.8pit.net/tmsim.git

  1/*
  2 * Copyright © 2016-2019 Sören Tempel
  3 *
  4 * This program is free software: you can redistribute it and/or
  5 * modify it under the terms of the GNU Affero General Public
  6 * License as published by the Free Software Foundation, either
  7 * version 3 of the License, or (at your option) any later version.
  8 *
  9 * This program is distributed in the hope that it will be useful,
 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 12 * Affero General Public License for more details.
 13 *
 14 * You should have received a copy of the GNU Affero General Public
 15 * License along with this program. If not, see
 16 * <http://www.gnu.org/licenses/>.
 17 */
 18
 19#include <assert.h>
 20#include <stdio.h>
 21#include <stdlib.h>
 22
 23#include <sys/types.h>
 24
 25#include "parser.h"
 26#include "scanner.h"
 27#include "token.h"
 28#include "turing.h"
 29#include "util.h"
 30
 31/**
 32 * Macro which should be used if the next expected token is a semicolon token.
 33 * If it isn't the proper error code is returned.
 34 */
 35#define EXPSEM(T) \
 36	do { \
 37		if (T->type != TOK_SEMICOLON) \
 38			return PAR_SEMICOLON; \
 39	} while (0)
 40
 41/**
 42 * Returns the next token and advances the parser position.
 43 *
 44 * @param par Parser to extract next token from.
 45 * @pre A previous call of this function should not have returned TOK_EOF.
 46 * @returns Next token.
 47 */
 48static token *
 49next(parser *par)
 50{
 51	token *tok;
 52
 53	if (par->prevtok != NULL)
 54		freetoken(par->prevtok);
 55
 56	tok = par->peektok;
 57	if (tok != NULL)
 58		par->peektok = NULL;
 59	else
 60		tok = nexttoken(par->scr);
 61
 62	par->prevtok = tok;
 63	return tok;
 64}
 65
 66/**
 67 * Returns the next token for the given parser without advancing the position.
 68 *
 69 * @pre A previous call of this ::next should not have returned TOK_EOF.
 70 * @param par Parser to extract next token from.
 71 * @returns Next token.
 72 */
 73static token *
 74peek(parser *par)
 75{
 76	token *tok;
 77
 78	tok = par->peektok;
 79	if (tok != NULL)
 80		return tok;
 81
 82	par->peektok = nexttoken(par->scr);
 83	return par->peektok;
 84}
 85
 86/**
 87 * Allocates memory for a new parser, initializes it and returns a
 88 * pointer to it.
 89 *
 90 * @param str Input which should be parsed.
 91 * @param len Length of the input.
 92 * @returns Pointer to the newly created parser.
 93 */
 94parser *
 95newparser(char *str, size_t len)
 96{
 97	parser *par;
 98
 99	par = emalloc(sizeof(parser));
100	par->scr = scanstr(str, len);
101	par->peektok = par->prevtok = par->tok = NULL;
102	return par;
103}
104
105/**
106 * Formats a parerr as a string and includes line and column information
107 * where the error (presumably) ocurred. The string is directly written
108 * to the given stream.
109 *
110 * @pre Parser must have encountered an error.
111 * @param par Parser which returned the parerr.
112 * @param err Parser error returned by ::parsetm.
113 * @param fn Name of the file the parser was trying to parse.
114 * @param stream Stream to write error message to.
115 * @return Number of characters written to the stream.
116 */
117int
118strparerr(parser *par, parerr err, char *fn, FILE *stream)
119{
120	int r;
121	size_t pos;
122	token *tok;
123	char *msg, *line, *marker;
124
125	assert(err != PAR_OK);
126	tok = par->tok;
127	assert(tok);
128	msg = "Unkown error.";
129
130	/* Check for scanner error. */
131	if (tok->type == TOK_ERROR) {
132		switch (tok->value) {
133		case ERR_OVERFLOW:
134			msg = "Numeric state name exceeds INT_MAX.";
135			goto ret;
136		case ERR_UNDERFLOW:
137			msg = "Numeric state names can't be negative.";
138			goto ret;
139		case ERR_UNKOWN:
140			msg = "Lexer encountered an unknown character.";
141			goto ret;
142		case ERR_UNEXPECTED:
143			msg = "A terminal string was expected but the "
144			      "lexer encountered a character which is "
145			      "not part of the expected string. Perhaps "
146			      "you misspelled 'start:' or 'accept:'.";
147			goto ret;
148		}
149	}
150
151	/* Wasn't a lexer error so the err parameter is actually relevant. */
152	switch (err) {
153	case PAR_SEMICOLON:
154		msg = "Missing semicolon, maybe the previous transition "
155		      "is missing a semicolon or a metadata information "
156		      "was not properly terminated with a semicolon.";
157		break;
158	case PAR_STATEDEFTWICE:
159		msg = "This state was already defined previously. You can't "
160		      "define states twice please move all transitions from "
161		      "this state definition to the previous definition.";
162		break;
163	case PAR_TRANSDEFTWICE:
164		msg = "Only deterministic turing machines are supported. "
165		      "Meaning you can't have more than one transition "
166		      "for the same input symbol.";
167		break;
168	case PAR_STARTKEY:
169		msg = "An initial state wasn't defined. Please define it "
170		      "using the 'start:' keyword.";
171		return fprintf(stream, "%s: %s\n", fn, msg);
172	case PAR_INITALSTATE:
173		msg = "The initial state value cannot be left empty.";
174		break;
175	case PAR_ACCEPTKEY:
176		msg = "Accepting states where not defined. Please define "
177		      "one or more accepting states using the "
178		      "'accept:' keyword.";
179		return fprintf(stream, "%s: %s\n", fn, msg);
180	case PAR_NONSTATEACCEPT:
181		msg = "Your accepting state list contains a token which is "
182		      "not a state name or is empty.";
183		break;
184	case PAR_STATEDEF:
185		msg = "Expected a state definition but didn't find a valid "
186		      "state name. Valid state names must match the "
187		      "following regex: 'q[0-9]+'.";
188		break;
189	case PAR_LBRACKET:
190		msg = "The parser expected an opening curly bracket as a "
191		      "part of this state definition.";
192		break;
193	case PAR_RBRACKET:
194		msg = "The parser expected a closing curly bracket as a "
195		      "part of this state definition.";
196		break;
197	case PAR_RSYMBOL:
198		msg = "Your transition definition is missing a symbol "
199		      "which triggers this transition. This symbol "
200		      "can only be an alphanumeric character or the "
201		      "special blank character.";
202		break;
203	case PAR_DIRECTION:
204		msg = "Expected direction to move head to, this symbol is "
205		      "not a valid head direction symbol.";
206		break;
207	case PAR_WSYMBOL:
208		msg = "Your transition definition is missing a symbol "
209		      "which is written to the tape when this transition "
210		      "is performed. This symbol can only be an "
211		      "alphanumeric character or the special blank character.";
212		break;
213	case PAR_NEXTSTATESYM:
214		msg = "The next state symbol ('=>') was expected but "
215		      "not found.";
216		break;
217	case PAR_NEXTSTATE:
218		msg = "Your transition is missing a state to transit to "
219		      "when performing this transition.";
220		break;
221	case PAR_OK:
222		/* Never reached */
223		break;
224	}
225
226ret:
227	if (!(line = linenum(par->scr->input, tok->line))) {
228		msg = "Current token contains an invalid line number. "
229		      "This is a bug, please consider reporting it.";
230		return fprintf(stream, "%s\n", msg);
231	}
232
233	if (tok->type == TOK_EOF)
234		pos = endofline(line);
235	else
236		pos = tok->column - 1;
237
238	marker = mark(pos, line);
239	r = fprintf(stream, "%s:%d:%d: %s\n %s\n %s\n", fn, tok->line,
240	            tok->column, msg, line, marker);
241
242	free(line);
243	free(marker);
244	return r;
245}
246
247/**
248 * Frees all resources for a given parser.
249 *
250 * @param par Pointer to the parser which should be freed.
251 */
252void
253freeparser(parser *par)
254{
255	assert(par);
256
257	/* Tokens are freed in the next method. */
258
259	freescanner(par->scr);
260	free(par);
261}
262
263/**
264 * Parses the metadata information of a tmsim input file.
265 *
266 * The following EBNF rules describes valid input:
267 *
268 * \code
269 * metadata = "start:", statename, ";",
270 * 	"accept:", statenames, ";";
271 * \endcode
272 *
273 * @param par Parser for which metadata should be parsed.
274 * @param dest Pointer to a Turing machine, if the metadata was
275 * 	parsed successfully the accept array and the start field
276 * 	of the dtm struct are set accordingly.
277 * @return Error code or PAR_OK if no error was encountered.
278 */
279static parerr
280parsemeta(parser *par, dtm *dest)
281{
282	par->tok = next(par);
283	if (par->tok->type != TOK_START)
284		return PAR_STARTKEY;
285
286	par->tok = next(par);
287	if (par->tok->type != TOK_STATE)
288		return PAR_INITALSTATE;
289	dest->start = par->tok->value;
290
291	par->tok = next(par);
292	EXPSEM(par->tok);
293
294	par->tok = next(par);
295	if (par->tok->type != TOK_ACCEPT)
296		return PAR_ACCEPTKEY;
297
298	do {
299		par->tok = next(par);
300		if (par->tok->type != TOK_STATE)
301			return PAR_NONSTATEACCEPT;
302		addaccept(dest, par->tok->value);
303
304		par->tok = next(par);
305	} while (par->tok->type == TOK_COMMA &&
306	         par->tok->type != TOK_SEMICOLON);
307
308	EXPSEM(par->tok);
309
310	return PAR_OK;
311}
312
313/**
314 * Parses a state transition of a tmsim input file.
315 *
316 * The following EBNF rules describes valid input:
317 *
318 * \code
319 * transition = symbol, direction, symbol, "=>", statename;
320 * \endcode
321 *
322 * @param par Parser for which a transition should be parsed.
323 * @param dest Pointer to a transition, if the transition was parsed
324 * 	successfully, the struct fields are initialized accordingly.
325 * @return Error code or PAR_OK if no error was encountered.
326 */
327static parerr
328parsetrans(parser *par, tmtrans *dest)
329{
330	par->tok = next(par);
331	if (par->tok->type != TOK_SYMBOL)
332		return PAR_RSYMBOL;
333	dest->rsym = (char)par->tok->value;
334
335	par->tok = next(par);
336	switch (par->tok->type) {
337	case TOK_SMALLER:
338		dest->headdir = LEFT;
339		break;
340	case TOK_GREATER:
341		dest->headdir = RIGHT;
342		break;
343	case TOK_PIPE:
344		dest->headdir = STAY;
345		break;
346	default:
347		return PAR_DIRECTION;
348	}
349
350	par->tok = next(par);
351	if (par->tok->type != TOK_SYMBOL)
352		return PAR_WSYMBOL;
353	dest->wsym = (char)par->tok->value;
354
355	par->tok = next(par);
356	if (par->tok->type != TOK_NEXT)
357		return PAR_NEXTSTATESYM;
358
359	par->tok = next(par);
360	if (par->tok->type != TOK_STATE)
361		return PAR_NEXTSTATE;
362	dest->nextstate = par->tok->value;
363
364	return PAR_OK;
365}
366
367/**
368 * Parses a state definition of a tmsim input file.
369 *
370 * The following EBNF rules describes valid input:
371 *
372 * \code
373 * state = statename, "{", [ transitions ], "}";
374 * \endcode
375 *
376 * @param par Parser for which a state definition should be parsed.
377 * @param dest Pointer to a Turing state, if the state definition was parsed
378 * 	successfully, the struct fields are initialized accordingly.
379 * @return Error code or PAR_OK if no error was encountered.
380 */
381static parerr
382parsestate(parser *par, tmstate *dest)
383{
384	tmtrans *trans;
385	parerr ret;
386
387	par->tok = next(par);
388	if (par->tok->type != TOK_STATE)
389		return PAR_STATEDEF;
390	dest->name = par->tok->value;
391
392	par->tok = next(par);
393	if (par->tok->type != TOK_LBRACKET)
394		return PAR_LBRACKET;
395
396	while (peek(par)->type != TOK_RBRACKET) {
397		trans = emalloc(sizeof(tmtrans));
398		if ((ret = parsetrans(par, trans)) != PAR_OK) {
399			free(trans);
400			return ret;
401		}
402
403		if (addtrans(dest, trans)) {
404			free(trans);
405			return PAR_TRANSDEFTWICE;
406		}
407
408		par->tok = next(par);
409		EXPSEM(par->tok);
410	}
411
412	par->tok = next(par);
413	if (par->tok->type != TOK_RBRACKET)
414		return PAR_RBRACKET; /* Never reached. */
415
416	return PAR_OK;
417}
418
419/**
420 * Parses a series of state definitions of a tmsim input file.
421 *
422 * The following EBNF rules describes valid input:
423 *
424 * \code
425 * states = { state };
426 * \endcode
427 *
428 * @param par Parser for which a state definition should be parsed.
429 * @param dest Pointer to a Turing machine, if the states were parsed
430 * 	successfully, the struct fields are initialized accordingly.
431 * @return Error code or PAR_OK if no error was encountered.
432 */
433static parerr
434parsestates(parser *par, dtm *dest)
435{
436	parerr ret;
437	tmstate *state;
438
439	while (peek(par)->type != TOK_EOF) {
440		state = newtmstate();
441		if ((ret = parsestate(par, state)) != PAR_OK) {
442			free(state);
443			return ret;
444		}
445
446		if (addstate(dest, state)) {
447			free(state);
448			return PAR_STATEDEFTWICE;
449		}
450	}
451
452	/* skip and free EOF */
453	par->tok = next(par);
454	freetoken(par->tok);
455
456	return PAR_OK;
457}
458
459/**
460 * Parses a tmsim input file.
461 *
462 * The following EBNF rules describes valid input:
463 *
464 * \code
465 * turingmachine = metadata, states;
466 * \endcode
467 *
468 * @param par Parser for which a state definition should be parsed.
469 * @param dest Pointer to a Turing machine, if the states were parsed
470 * 	successfully, the struct fields are initialized accordingly.
471 * @return Error code or PAR_OK if no error was encountered.
472 */
473parerr
474parsetm(parser *par, dtm *dest)
475{
476	parerr ret;
477
478	if ((ret = parsemeta(par, dest)) != PAR_OK)
479		return ret;
480	if ((ret = parsestates(par, dest)) != PAR_OK)
481		return ret;
482
483	return PAR_OK;
484}