tmsim

A fast turing machine simulator with graphviz export functionality

git clone https://git.8pit.net/tmsim.git

  1/*
  2 * Copyright © 2016-2019 Sören Tempel
  3 *
  4 * This program is free software: you can redistribute it and/or
  5 * modify it under the terms of the GNU Affero General Public
  6 * License as published by the Free Software Foundation, either
  7 * version 3 of the License, or (at your option) any later version.
  8 *
  9 * This program is distributed in the hope that it will be useful,
 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 12 * Affero General Public License for more details.
 13 *
 14 * You should have received a copy of the GNU Affero General Public
 15 * License along with this program. If not, see
 16 * <http://www.gnu.org/licenses/>.
 17 */
 18
 19#include <assert.h>
 20#include <ctype.h>
 21#include <errno.h>
 22#include <limits.h>
 23#include <pthread.h>
 24#include <stdio.h>
 25#include <stdlib.h>
 26#include <string.h>
 27
 28#include <sys/types.h>
 29
 30#include "queue.h"
 31#include "scanner.h"
 32#include "token.h"
 33#include "turing.h"
 34#include "util.h"
 35
 36static void lexspace(scanner *);
 37static void lexcomment(scanner *);
 38static void lexstate(scanner *);
 39static void lexterm(scanner *);
 40
 41/**
 42 * Reads the next character from the input string and increments current
 43 * position of the scanner in the input string.
 44 *
 45 * @param scr Scanner from which character should be read.
 46 * @returns The next character or -1 if there is none.
 47 */
 48static int
 49nextch(scanner *scr)
 50{
 51	if (scr->pos >= scr->inlen)
 52		return -1;
 53
 54	return scr->input[scr->pos++];
 55}
 56
 57/**
 58 * Reads the next character from the input string but doesn't increment
 59 * the current position of the scanner.
 60 *
 61 * @param scr Scanner from which character should be read.
 62 * @returns The next character or -1 if there is none.
 63 */
 64static int
 65peekch(scanner *scr)
 66{
 67	int nxt;
 68
 69	nxt = nextch(scr);
 70	if (nxt != -1)
 71		scr->pos--;
 72	return nxt;
 73}
 74
 75/**
 76 * Ignores all characters in the input string after the current start position.
 77 *
 78 * @param scr Scanner for which characters should be ignored.
 79 */
 80static void
 81ignore(scanner *scr)
 82{
 83	scr->start = scr->pos;
 84}
 85
 86/**
 87 * Whether or not the current character is a valid turing machine input
 88 * symbol. Which is the case if it is either an alphanumeric character
 89 * or the special blank character.
 90 *
 91 * @param c Character which should be examined.
 92 * @returns Non-zero integer if it is, zero if it isn't.
 93 */
 94static int
 95issymbol(int c)
 96{
 97	return isalnum(c) || c == BLANKCHAR;
 98}
 99
100/**
101 * Emits a new token and enqueues it.
102 *
103 * @param scr Scanner which found the token that should be emitted.
104 * @param tkt Type of the token.
105 * @param value Value of the token.
106 */
107static void
108emit(scanner *scr, toktype tkt, int value)
109{
110	token *tok;
111
112	tok = emalloc(sizeof(token));
113	tok->type = tkt;
114	tok->line = scr->line;
115	tok->column = scr->column;
116	tok->value = value;
117
118	enqueue(scr->tqueue, tok);
119	scr->start = scr->pos;
120}
121
122/**
123 * Tail recursive function parsing the entire input string.
124 * Usually passed to pthread_create(3).
125 *
126 * @param scr Pointer to the associated scanner.
127 */
128static void
129lexany(scanner *scr)
130{
131	int nxt;
132
133	/* Make this function a cancel point. */
134	pthread_testcancel();
135
136	scr->column++;
137	if ((nxt = nextch(scr)) == -1) {
138		scr->column = 0;
139		if (scr->line > 1)
140			scr->line--;
141
142		emit(scr, TOK_EOF, TOKNOP);
143		LEXRET(scr, NULL);
144	}
145
146	switch (nxt) {
147	case '\n':
148		scr->column = 0;
149		scr->line++;
150		ignore(scr);
151		LEXRET(scr, lexany);
152	case '#':
153		LEXRET(scr, lexcomment);
154	case ',':
155		emit(scr, TOK_COMMA, TOKAUTO(scr));
156		LEXRET(scr, lexany);
157	case ';':
158		emit(scr, TOK_SEMICOLON, TOKAUTO(scr));
159		LEXRET(scr, lexany);
160	case '{':
161		emit(scr, TOK_LBRACKET, TOKAUTO(scr));
162		LEXRET(scr, lexany);
163	case '}':
164		emit(scr, TOK_RBRACKET, TOKAUTO(scr));
165		LEXRET(scr, lexany);
166	case '<':
167		emit(scr, TOK_SMALLER, TOKAUTO(scr));
168		LEXRET(scr, lexany);
169	case '>':
170		emit(scr, TOK_GREATER, TOKAUTO(scr));
171		LEXRET(scr, lexany);
172	case '|':
173		emit(scr, TOK_PIPE, TOKAUTO(scr));
174		LEXRET(scr, lexany);
175	case 'q':
176		if (isdigit(peekch(scr)))
177			LEXRET(scr, lexstate);
178		break;
179	case '=':
180		if (nextch(scr) == '>')
181			emit(scr, TOK_NEXT, TOKNOP);
182		else
183			emit(scr, TOK_ERROR, ERR_UNKOWN);
184
185		scr->column++;
186		LEXRET(scr, lexany);
187	}
188
189	if (issymbol(nxt) && !issymbol(peekch(scr))) {
190		emit(scr, TOK_SYMBOL, TOKAUTO(scr));
191		LEXRET(scr, lexany);
192	} else if (isspace(nxt)) {
193		LEXRET(scr, lexspace);
194	}
195
196	LEXRET(scr, lexterm);
197}
198
199/**
200 * Skips/Ignores all white-space characters. Also
201 * calls lexany as soon as it encounters the first
202 * non-white-space character.
203 *
204 * @param scr Pointer to the associated scanner.
205 */
206static void
207lexspace(scanner *scr)
208{
209	while (isspace(peekch(scr))) {
210		nextch(scr);
211		scr->column++;
212	}
213
214	ignore(scr);
215	LEXRET(scr, lexany);
216}
217
218/**
219 * Skips/Ignores a comment. A comment begins with the
220 * ASCII symbol '#' and ends with a newline.
221 *
222 * @param scr Pointer to the associated scanner.
223 */
224static void
225lexcomment(scanner *scr)
226{
227	while (peekch(scr) != '\n')
228		if (nextch(scr) == -1)
229			break;
230
231	/* We don't need to increment scr->column here because
232	 * comments are currently not exposed to the parser
233	 * and since they end with a newline they don't effect
234	 * other tokens either (unlike white-spaces). */
235
236	ignore(scr);
237	LEXRET(scr, lexany);
238}
239
240/**
241 * Lexes a state name. Also calls lexany as soon as
242 * it encounters a non-digit character. Besides an
243 * error token is emitted if converting the token
244 * name to an int would cause an integer over/underflow.
245 *
246 * @pre The previous char must be the ASCII character 'q'.
247 * @param scr Pointer to the associated scanner.
248 */
249static void
250lexstate(scanner *scr)
251{
252	char *input, buf[STATELEN];
253	unsigned int col;
254	size_t len;
255	long value;
256
257	input = &scr->input[scr->pos];
258
259	col = scr->column;
260	while (isdigit(peekch(scr)))
261		nextch(scr);
262	scr->column = col;
263
264	len = scr->pos - scr->start - 1;
265	if (len > STATELEN - 1) {
266		emit(scr, TOK_ERROR, ERR_OVERFLOW);
267		goto ret;
268	}
269
270	memcpy(buf, input, len);
271	buf[len] = '\0';
272
273	value = strtol(buf, NULL, 10);
274	if (value < 0)
275		emit(scr, TOK_ERROR, ERR_UNDERFLOW);
276	else if (value > INT_MAX)
277		emit(scr, TOK_ERROR, ERR_OVERFLOW);
278	else
279		emit(scr, TOK_STATE, (int)value);
280
281ret:
282	scr->column += (unsigned int)len; /* Initial 'q' and digits. */
283	LEXRET(scr, lexany);
284}
285
286/**
287 * Attempts to lex the terminal symbols `start:` and `accept:`. It calls
288 * ::lexany as soon as it reaches the end of the given terminal symbol
289 * or if a character is encountered which is not part of the any known
290 * terminal symbol.
291 *
292 * @param scr Pointer to the associated scanner.
293 */
294static void
295lexterm(scanner *scr)
296{
297	char *ter;
298	toktype tkt;
299	size_t pos, len;
300
301	switch (scr->input[scr->start]) {
302	case 's':
303		tkt = TOK_START;
304		ter = "start:";
305		break;
306	case 'a':
307		tkt = TOK_ACCEPT;
308		ter = "accept:";
309		break;
310	default:
311		emit(scr, TOK_ERROR, ERR_UNKOWN);
312		LEXRET(scr, lexany);
313	}
314
315	len = strlen(ter);
316	if (!xstrncmp(ter, &scr->input[scr->start], len, &pos)) {
317		emit(scr, tkt, TOKNOP);
318	} else {
319		/* pos should always be < strlen({start:,accept:}) */
320		assert(pos <= UINT_MAX - 1);
321
322		scr->column += (unsigned int)pos;
323		emit(scr, TOK_ERROR, ERR_UNEXPECTED);
324	}
325
326	scr->pos += --len;
327	scr->start = scr->pos;
328
329	scr->column += (unsigned int)len;
330	LEXRET(scr, lexany);
331}
332
333/**
334 * Function invoked by a seperate LWP for parsing the input string. This
335 * function simply calls the current ::statefn until the called
336 * ::statefn function sets the current state to `NULL`.
337 *
338 * @param pscr Void pointer to the scanner used for lexing.
339 * @returns A null pointer.
340 */
341static void *
342tokloop(void *pscr)
343{
344	scanner *scr;
345
346	scr = (scanner *)pscr;
347	while (scr->state != NULL)
348		(*scr->state)(scr); /* fn must set scr->state. */
349
350	return NULL;
351}
352
353/**
354 * Creates a new scanner for the given input and starts lexing the
355 * it in a seperated thread.
356 *
357 * @param input Input which should be scanned.
358 * @param len Length of the input.
359 * @returns Scanner for the given input.
360 */
361scanner *
362scanstr(char *input, size_t len)
363{
364	scanner *scr;
365
366	scr = emalloc(sizeof(scanner));
367	scr->tqueue = newqueue();
368	scr->state = lexany;
369	scr->pos = scr->start = scr->column = 0;
370	scr->inlen = len;
371	scr->input = input;
372	scr->line = 1;
373
374	if ((errno = pthread_create(&scr->thread, NULL, tokloop, (void *)scr)))
375		die("pthread_create failed");
376
377	return scr;
378}
379
380/**
381 * Frees the allocated memory for the given scanner.
382 *
383 * @param scr Scanner for which allocated memory should be freed.
384 */
385void
386freescanner(scanner *scr)
387{
388	assert(scr);
389
390	if (!pthread_cancel(scr->thread)) {
391		if ((errno = pthread_join(scr->thread, NULL)))
392			die("pthread_join failed");
393	}
394
395	freequeue(scr->tqueue);
396	free(scr);
397}
398
399/**
400 * Returns the least recent token scanned by the given scanner. If no
401 * token has been scanned so for this function blocks until a token has
402 * been emitted by the scanner. If the last token (TOK_EOF) was already
403 * returned by this function then it causes a deadlock.
404 *
405 * @pre A previous call of this function didn't return TOK_EOF.
406 * @param scr Scanner to extract token from.
407 */
408token *
409nexttoken(scanner *scr)
410{
411	return dequeue(scr->tqueue);
412}