/*
 * Copyright 1996,97 Thierry Bousch
 * Licensed under the Gnu Public License, Version 2
 *
 * $Id: lex.c,v 1.2 1997/04/16 18:18:06 bousch Exp $
 *
 * The lexical analyser of "samuel"
 */

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "saml.h"
#include "saml-util.h"
#include "samuel.h"
#include "tokens.h"

#define MAX_YY_FILES	10
#define YYIN		yyin[yylevel]

static int yylevel, stdin_is_a_tty;
static FILE* yyin[MAX_YY_FILES];
static gr_string *yybuf;

void init_lexer (void)
{
	stdin_is_a_tty = isatty(0);
	yybuf = new_gr_string(0);
	yylevel = 0;
	YYIN = stdin;
}

#ifndef USE_READLINE

#define GETCHAR()	fgetc(YYIN)
#define UNGETC(c)	ungetc(c,YYIN)

#else	/* USE_READLINE */

extern char* readline (const char *prompt);
extern void add_history (const char *line);
static char *r_buffer, *r_buffer_ptr;

static int GETCHAR (void)
{
	int c;

	if (yylevel || !stdin_is_a_tty) {
		/* Don't use readline */
		return fgetc(YYIN);
	}
	if (!r_buffer) {
		/* Refill the buffer */
		r_buffer_ptr = r_buffer = readline("");
		if (r_buffer == NULL)
			return EOF;
		if (r_buffer[0] != '\0')
			add_history(r_buffer);
	}
	c = *(unsigned char *)(r_buffer_ptr++);
	if (c) return c;
	/* End of buffer reached */
	free(r_buffer);
	r_buffer = NULL;
	return '\n';
}

static void UNGETC (int c)
{
	if (yylevel || !r_buffer_ptr) {
		ungetc(c, YYIN);
		return;
	}
	/*
	 * It's not really possible to push back a newline into the buffer,
	 * since we've just released the buffer. Fortunately, whitespace is
	 * not significant, unless a semicolon is followed by a newline,
	 * but in this case UNGETC() will not be called.
	 */
	if (c != EOF && c != '\n')
		*--r_buffer_ptr = c;
}

#endif	/* USE_READLINE */

static int yylex_number (int c)
{
	/* This is a number */
	for (yybuf->len = 0; c >= '0' && c <= '9'; c = GETCHAR())
		yybuf = grs_append1(yybuf, c);
	/* Oops, one byte too far */
	UNGETC(c);
	/* strdup expects a null-terminated string */
	yybuf = grs_append1(yybuf, '\0');
	yylval.string = strdup(yybuf->s);
	if (!yylval.string) panic("yylex_number: out of memory");
	return INTEGER;
}

static int yylex_literal (int c)
{
	int form_id = (c == '[');
	int balance = 0;
	int subscripts = 0;

	yybuf->len = 0;
	if (!form_id) {
		do {
			yybuf = grs_append1(yybuf, c);
			c = GETCHAR();
		} while ((c&0x80)==0 && (isalnum(c) || c=='_'));
	}
next_bracket:
	/*
	 * At this point, "balance" should be zero (i.e., opening and closing
	 * brackets should match) unless we've reached EOF and the identifier
	 * is truncated. If the next character is not an opening bracket,
	 * then we stop here.
	 */
	if (c != '[') {
		/* We have reached the end of the identifier */
		UNGETC(c);
		yybuf = grs_append1(yybuf, '\0');
		yylval.string = strdup(yybuf->s);
		if (!yylval.string) panic("yylex_literal: out of memory");
		return IDENT;
	}
	++subscripts;
	while(c != EOF) {
		yybuf = grs_append1(yybuf, c);
		balance += (c == '[') - (c == ']');
		c = GETCHAR();
		if (!balance) goto next_bracket;
	}
	/* End of file reached */
	if (balance)
		fprintf(stderr, "yylex: unmatched bracket\n");
	goto next_bracket;
}

static int yylex_qstring (void)
{
	int c;
	/*
	 * The lexer has seen a double quote, and we're reading up to
	 * the next double quote. To quote a double quote, repeat it,
	 * for example "This is ""quoted"" text."
	 * Obviously this is inspired from Tom Duff's rc shell.
	 */
	yybuf->len = 0;
	while ((c = GETCHAR()) != EOF) {
		if (c == '"' && (c = GETCHAR()) != '"') {
			/* End of the string */
			UNGETC(c);
			yybuf = grs_append1(yybuf, '\0');
			yylval.string = strdup(yybuf->s);
			if (!yylval.string)
				panic("yylex_qstring: out of memory");
			return QSTRING;
		}
		yybuf = grs_append1(yybuf, c);
	}
	/* Ouch. It's a serious error. Return an EOF condition */
	fprintf(stderr, "yylex: unmatched quote\n");
	return 0;
}

int yylex (void)
{
	int c, d;
	FILE *inc;
top:
	if ((c = GETCHAR()) == EOF) {
		fclose(YYIN);
		/* If topmost stream, return EOF */
		if (!yylevel)
			return 0;
		/* Otherwise, revert to the previous data stream */
		--yylevel;
		goto top;
	}
	/*
	 * Non-ASCII characters cannot appear at the beginning of a token.
	 * This must be the first test, otherwise conditions like isspace(c)
	 * or isalpha(c) would depend on the locale, which would be
	 * a very bad thing.
	 */
	if (c & 0x80)
		return c;
	/* The NUL character is considered as whitespace */
	if (c == '\0' || isspace(c))
		goto top;
	if (c == '#') {
		/* Skip comment to end of line */
		while ((c = GETCHAR()) != EOF && c != '\n')
		    ;
		goto top;
	}
	if (isdigit(c))
		return yylex_number(c);

	if (isalpha(c) || c == '_' || c == '[')
		return yylex_literal(c);

	if (c == '"')
		return yylex_qstring();

	if (c == '-' || c == '=' || c == '>') {
		if ((d = GETCHAR()) == '>')
			switch(c) {
			  case '-': return TOK_ARROW;		/* -> */
			  case '=': return TOK_DBL_ARROW;	/* => */
			  case '>': return TOK_SHIFT;		/* >> */
			}
		UNGETC(d);
	}
	if (c == '<') {
	    if ((d = GETCHAR()) == '"') {
		/* Source file */
		if (yylex_qstring() == 0)
			return 0;  /* unmatched quote */
		if (yylevel == MAX_YY_FILES-1) {
			fprintf(stderr, "yylex: too many nested files "
			  "(maximum is %d)\n", MAX_YY_FILES);
			goto top;
		}
		if ((inc = fopen(yybuf->s, "r")) == NULL) {
			perror("yylex: fopen");
			fprintf(stderr, "yylex: cannot source file \"%s\"\n",
			  yybuf->s);
			goto top;
		}
		yyin[++yylevel] = inc;
		goto top;
	    }
	    UNGETC(d);
	}
	if (c == '~') {
		if ((d = GETCHAR()) == '~')
			return TOK_MODULO;
		UNGETC(d);
	}
	if (c == '?') {
		if ((d = GETCHAR()) == '?')
			return TOK_PRINT_VARS;
		UNGETC(d);
	}
	/*
	 * Return a special token when the semicolon is immediately followed
	 * by a newline. We don't do it in quiet mode: it would be useless
	 * and cause unnecessary lookahead.
	 */
	if (!quiet && c == ';') {
		if ((d = GETCHAR()) == '\n')
			return TOK_SC_NL;
		UNGETC(d);
	}

	/* Nothing special, return the character itself */
	return c;
}

