/*-------------------------------------------------------------------------
ParserGen -- Generation of the Recursive Descent Parser
Compiler Generator Coco/R,
Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
ported to C++ by Csaba Balazs, University of Szeged
extended by M. Loeberbauer & A. Woess, Univ. of Linz
with improvements by Pat Terry, Rhodes University

This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

As an exception, it is allowed to write an extension of Coco/R that is
used as a plugin in non-free software.

If not otherwise stated, any source code generated by Coco/R (other than
Coco/R itself) does not fall under the GNU General Public License.
-------------------------------------------------------------------------*/

#include <ctype.h>
#include "ArrayList.h"
#include "ParserGen.h"
#include "Parser.h"
#include "BitArray.h"
#include "Scanner.h"

namespace Coco {

void ParserGen::Indent (int n) {
	for (int i = 1; i <= n; i++) fwprintf(gen, L"\t");
}

// AW: use a switch if more than 5 alternatives and none starts with a resolver
bool ParserGen::UseSwitch (Node *p) {
	if (p->typ != Node::alt) return false;
	int nAlts = 0;
	while (p != NULL) {
	  ++nAlts;
	  // must not optimize with switch-statement, if alt uses a resolver expression
	  if (p->sub->typ == Node::rslv) return false;
	  p = p->down;
	}
	return nAlts > 5;
}

void ParserGen::CopyFramePart(const wchar_t* stop) {
	wchar_t startCh = stop[0];
	int endOfStopString = coco_string_length(stop)-1;
	wchar_t ch = 0;
	fwscanf(fram, L"%lc", &ch);
	while (!feof(fram))
		if (ch == startCh) {
			int i = 0;
			do {
				if (i == endOfStopString) return; // stop[0..i] found
				fwscanf(fram, L"%lc", &ch); i++;
			} while (ch == stop[i]);
			// stop[0..i-1] found; continue with last read character
			wchar_t *subStop = coco_string_create(stop, 0, i);
			fwprintf(gen, L"%ls", subStop);
			coco_string_delete(subStop);
		} else {
			fwprintf(gen, L"%lc", ch);
			fwscanf(fram, L"%lc", &ch);
		}
	errors->Exception(L" -- incomplete or corrupt parser frame file");
}

int ParserGen::GenNamespaceOpen(const wchar_t *nsName) {
	if (nsName == NULL || coco_string_length(nsName) == 0) {
		return 0;
	}
	const int len = coco_string_length(nsName);
	int startPos = 0;
	int nrOfNs = 0;
	do {
		int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR);
		if (curLen == -1) { curLen = len - startPos; }
		wchar_t *curNs = coco_string_create(nsName, startPos, curLen);
		fwprintf(gen, L"namespace %ls {\n", curNs);
		coco_string_delete(curNs);
		startPos = startPos + curLen + 1;
		++nrOfNs;
	} while (startPos < len);
	return nrOfNs;
}

void ParserGen::GenNamespaceClose(int nrOfNs) {
	for (int i = 0; i < nrOfNs; ++i) {
		fwprintf(gen, L"} // namespace\n");
	}
}

void ParserGen::CopySourcePart (Position *pos, int indent) {
	// Copy text described by pos from atg to gen
	int ch, i;
	if (pos != NULL) {
		buffer->SetPos(pos->beg); ch = buffer->Read();
		if (tab->emitLines && pos->line) {
			fwprintf(gen, L"\n#line %d \"%ls\"\n", pos->line, tab->srcName);
		}
		Indent(indent);
		while (buffer->GetPos() <= pos->end) {
			while (ch == CR || ch == LF) {  // eol is either CR or CRLF or LF
				fwprintf(gen, L"\n"); Indent(indent);
				if (ch == CR) { ch = buffer->Read(); } // skip CR
				if (ch == LF) { ch = buffer->Read(); } // skip LF
				for (i = 1; i <= pos->col && (ch == ' ' || ch == '\t'); i++) {
					// skip blanks at beginning of line
					ch = buffer->Read();
				}
				if (i <= pos->col) pos->col = i - 1; // heading TABs => not enough blanks
				if (buffer->GetPos() > pos->end) goto done;
			}
			fwprintf(gen, L"%lc", ch);
			ch = buffer->Read();
		}
		done:
		if (indent > 0) fwprintf(gen, L"\n");
	}
}

void ParserGen::GenErrorMsg (int errTyp, Symbol *sym) {
	errorNr++;
	const int formatLen = 1000;
	wchar_t format[formatLen];
	coco_swprintf(format, formatLen, L"\t\t\tcase %d: s = coco_string_create(L\"", errorNr);
	coco_string_merge(err, format);
	if (errTyp == tErr) {
		if (sym->name[0] == L'"') {
			coco_swprintf(format, formatLen, L"%ls expected", tab->Escape(sym->name));
			coco_string_merge(err, format);
		} else {
			coco_swprintf(format, formatLen, L"%ls expected", sym->name);
			coco_string_merge(err, format);
		}
	} else if (errTyp == altErr) {
		coco_swprintf(format, formatLen, L"invalid %ls", sym->name);
		coco_string_merge(err, format);
	} else if (errTyp == syncErr) {
		coco_swprintf(format, formatLen, L"this symbol not expected in %ls", sym->name);
		coco_string_merge(err, format);
	}
	coco_swprintf(format, formatLen, L"\"); break;\n");
	coco_string_merge(err, format);
}

int ParserGen::NewCondSet (BitArray *s) {
	for (int i = 1; i < symSet->Count; i++) // skip symSet[0] (reserved for union of SYNC sets)
		if (Sets::Equals(s, (BitArray*)(*symSet)[i])) return i;
	symSet->Add(s->Clone());
	return symSet->Count - 1;
}

void ParserGen::GenCond (BitArray *s, Node *p) {
	if (p->typ == Node::rslv) CopySourcePart(p->pos, 0);
	else {
		int n = Sets::Elements(s);
		if (n == 0) fwprintf(gen, L"false"); // happens if an ANY set matches no symbol
		else if (n <= maxTerm) {
			Symbol *sym;
			for (int i=0; i<tab->terminals->Count; i++) {
				sym = (Symbol*)((*(tab->terminals))[i]);
				if ((*s)[sym->n]) {
					fwprintf(gen, L"la->kind == %d", sym->n);
					--n;
					if (n > 0) fwprintf(gen, L" || ");
				}
			}
		} else
			fwprintf(gen, L"StartOf(%d)", NewCondSet(s));
	}
}

void ParserGen::PutCaseLabels (BitArray *s) {
	Symbol *sym;
	for (int i=0; i<tab->terminals->Count; i++) {
		sym = (Symbol*)((*(tab->terminals))[i]);
		if ((*s)[sym->n]) fwprintf(gen, L"case %d: ", sym->n);
	}
}

void ParserGen::GenCode (Node *p, int indent, BitArray *isChecked) {
	Node *p2;
	BitArray *s1, *s2;
	while (p != NULL) {
		if (p->typ == Node::nt) {
			Indent(indent);
			fwprintf(gen, L"%ls(", p->sym->name);
			CopySourcePart(p->pos, 0);
			fwprintf(gen, L");\n");
		} else if (p->typ == Node::t) {
			Indent(indent);
			// assert: if isChecked[p->sym->n] is true, then isChecked contains only p->sym->n
			if ((*isChecked)[p->sym->n]) fwprintf(gen, L"Get();\n");
			else fwprintf(gen, L"Expect(%d);\n", p->sym->n);
		} if (p->typ == Node::wt) {
			Indent(indent);
			s1 = tab->Expected(p->next, curSy);
			s1->Or(tab->allSyncSets);
			fwprintf(gen, L"ExpectWeak(%d, %d);\n", p->sym->n, NewCondSet(s1));
		} if (p->typ == Node::any) {
			Indent(indent);
			int acc = Sets::Elements(p->set);
			if (tab->terminals->Count == (acc + 1) || (acc > 0 && Sets::Equals(p->set, isChecked))) {
				// either this ANY accepts any terminal (the + 1 = end of file), or exactly what's allowed here
				fwprintf(gen, L"Get();\n");
			} else {
				GenErrorMsg(altErr, curSy);
				if (acc > 0) {
					fwprintf(gen, L"if ("); GenCond(p->set, p); fwprintf(gen, L") Get(); else SynErr(%d);\n", errorNr);
				} else fwprintf(gen, L"SynErr(%d); // ANY node that matches no symbol\n", errorNr);
			}
		} if (p->typ == Node::eps) {	// nothing
		} if (p->typ == Node::rslv) {	// nothing
		} if (p->typ == Node::sem) {
			CopySourcePart(p->pos, indent);
		} if (p->typ == Node::sync) {
			Indent(indent);
			GenErrorMsg(syncErr, curSy);
			s1 = p->set->Clone();
			fwprintf(gen, L"while (!("); GenCond(s1, p); fwprintf(gen, L")) {");
			fwprintf(gen, L"SynErr(%d); Get();", errorNr); fwprintf(gen, L"}\n");
		} if (p->typ == Node::alt) {
			s1 = tab->First(p);
			bool equal = Sets::Equals(s1, isChecked);
			bool useSwitch = UseSwitch(p);
			if (useSwitch) { Indent(indent); fwprintf(gen, L"switch (la->kind) {\n"); }
			p2 = p;
			while (p2 != NULL) {
				s1 = tab->Expected(p2->sub, curSy);
				Indent(indent);
				if (useSwitch) {
					PutCaseLabels(s1); fwprintf(gen, L"{\n");
				} else if (p2 == p) {
					fwprintf(gen, L"if ("); GenCond(s1, p2->sub); fwprintf(gen, L") {\n");
				} else if (p2->down == NULL && equal) { fwprintf(gen, L"} else {\n");
				} else {
					fwprintf(gen, L"} else if (");  GenCond(s1, p2->sub); fwprintf(gen, L") {\n");
				}
				GenCode(p2->sub, indent + 1, s1);
				if (useSwitch) {
					Indent(indent); fwprintf(gen, L"\tbreak;\n");
					Indent(indent); fwprintf(gen, L"}\n");
				}
				p2 = p2->down;
			}
			Indent(indent);
			if (equal) {
				fwprintf(gen, L"}\n");
			} else {
				GenErrorMsg(altErr, curSy);
				if (useSwitch) {
					fwprintf(gen, L"default: SynErr(%d); break;\n", errorNr);
					Indent(indent); fwprintf(gen, L"}\n");
				} else {
					fwprintf(gen, L"} "); fwprintf(gen, L"else SynErr(%d);\n", errorNr);
				}
			}
		} if (p->typ == Node::iter) {
			Indent(indent);
			p2 = p->sub;
			fwprintf(gen, L"while (");
			if (p2->typ == Node::wt) {
				s1 = tab->Expected(p2->next, curSy);
				s2 = tab->Expected(p->next, curSy);
				fwprintf(gen, L"WeakSeparator(%d,%d,%d) ", p2->sym->n, NewCondSet(s1), NewCondSet(s2));
				s1 = new BitArray(tab->terminals->Count);  // for inner structure
				if (p2->up || p2->next == NULL) p2 = NULL; else p2 = p2->next;
			} else {
				s1 = tab->First(p2);
				GenCond(s1, p2);
			}
			fwprintf(gen, L") {\n");
			GenCode(p2, indent + 1, s1);
			Indent(indent); fwprintf(gen, L"}\n");
		} if (p->typ == Node::opt) {
			s1 = tab->First(p->sub);
			Indent(indent);
			fwprintf(gen, L"if ("); GenCond(s1, p->sub); fwprintf(gen, L") {\n");
			GenCode(p->sub, indent + 1, s1);
			Indent(indent); fwprintf(gen, L"}\n");
		}
		if (p->typ != Node::eps && p->typ != Node::sem && p->typ != Node::sync)
			isChecked->SetAll(false);  // = new BitArray(Symbol.terminals.Count);
		if (p->up) break;
		p = p->next;
	}
}


void ParserGen::GenTokensHeader() {
	Symbol *sym;
	int i;

	fwprintf(gen, L"\tenum {\n");

	// tokens
	for (i=0; i<tab->terminals->Count; i++) {
		sym = (Symbol*)((*(tab->terminals))[i]);
		if (!isalpha(sym->name[0])) {
			continue;
		}
		fwprintf(gen , L"\t\t_%ls=%d,\n", sym->name, sym->n);
	}

	// pragmas
	for (i=0; i<tab->pragmas->Count; i++) {
		sym = (Symbol*)((*(tab->pragmas))[i]);
		fwprintf(gen , L"\t\t_%ls=%d,\n", sym->name, sym->n);
	}

	fwprintf(gen, L"\t};\n");
}

void ParserGen::GenCodePragmas() {
	Symbol *sym;
	for (int i=0; i<tab->pragmas->Count; i++) {
		sym = (Symbol*)((*(tab->pragmas))[i]);
		fwprintf(gen, L"\t\tif (la->kind == %d) {\n", sym->n);
		CopySourcePart(sym->semPos, 4);
		fwprintf(gen, L"\t\t}\n");
	}
}

void ParserGen::GenProductionsHeader() {
	Symbol *sym;
	for (int i=0; i<tab->nonterminals->Count; i++) {
		sym = (Symbol*)((*(tab->nonterminals))[i]);
		curSy = sym;
		fwprintf(gen, L"\tvoid %ls(", sym->name);
		CopySourcePart(sym->attrPos, 0);
		fwprintf(gen, L");\n");
	}
}

void ParserGen::GenProductions() {
	Symbol *sym;
	for (int i=0; i<tab->nonterminals->Count; i++) {
		sym = (Symbol*)((*(tab->nonterminals))[i]);
		curSy = sym;
		fwprintf(gen, L"void Parser::%ls(", sym->name);
		CopySourcePart(sym->attrPos, 0);
		fwprintf(gen, L") {\n");
		CopySourcePart(sym->semPos, 2);
		GenCode(sym->graph, 2, new BitArray(tab->terminals->Count));
		fwprintf(gen, L"}\n"); fwprintf(gen, L"\n");
	}
}

void ParserGen::InitSets() {
	fwprintf(gen, L"\tstatic bool set[%d][%d] = {\n", symSet->Count, tab->terminals->Count+1);

	for (int i = 0; i < symSet->Count; i++) {
		BitArray *s = (BitArray*)(*symSet)[i];
		fwprintf(gen, L"\t\t{");
		int j = 0;
		Symbol *sym;
		for (int k=0; k<tab->terminals->Count; k++) {
			sym = (Symbol*)((*(tab->terminals))[k]);
			if ((*s)[sym->n]) fwprintf(gen, L"T,"); else fwprintf(gen, L"x,");
			++j;
			if (j%4 == 0) fwprintf(gen, L" ");
		}
		if (i == symSet->Count-1) fwprintf(gen, L"x}\n"); else fwprintf(gen, L"x},\n");
	}
	fwprintf(gen, L"\t};\n\n");
}

void ParserGen::OpenGen(const wchar_t* genName, bool backUp) { /* pdt */
	wchar_t* fn = coco_string_create_append(tab->outDir, genName); /* pdt */
	char *chFn = coco_string_create_char(fn);
	FILE* tmp;
	if (backUp && ((tmp = fopen(chFn, "r")) != NULL)) {
		fclose(tmp);
		wchar_t* oldName = coco_string_create_append(fn, L".old");
		char *chOldName = coco_string_create_char(oldName);
		remove(chOldName); rename(chFn, chOldName); // copy with overwrite
		coco_string_delete(chOldName);
		coco_string_delete(oldName);
	}
	if ((gen = fopen(chFn, "w")) == NULL) {
		errors->Exception(L"-- Cannot generate parser file\n");
	}
	coco_string_delete(fn);
	coco_string_delete(chFn);
}

void ParserGen::WriteParser () {
	int oldPos = buffer->GetPos();  // Pos is modified by CopySourcePart
	symSet->Add(tab->allSyncSets);
	wchar_t *fr = coco_string_create_append(tab->srcDir, L"Parser.frame");
	char *chFr = coco_string_create_char(fr);

	FILE* tmp;
	if ((tmp = fopen(chFr, "r")) == NULL) {
		if (coco_string_length(tab->frameDir) != 0) {
			delete [] fr;
			fr = coco_string_create(tab->frameDir);
			coco_string_merge(fr, L"/");
			coco_string_merge(fr, L"Parser.frame");
		}
		coco_string_delete(chFr);
		chFr = coco_string_create_char(fr);
		if ((tmp = fopen(chFr, "r")) == NULL) {
			errors->Exception(L"-- Cannot find Parser.frame\n");
		} else {
			fclose(tmp);
		}
	} else {
		fclose(tmp);
	}

	if ((fram = fopen(chFr, "r")) == NULL) {
		errors->Exception(L"-- Cannot open Parser.frame.\n");
	}
	coco_string_delete(chFr);
	coco_string_delete(fr);

	OpenGen(L"Parser.h", true); /* pdt */

	Symbol *sym;
	for (int i=0; i<tab->terminals->Count; i++) {
		sym = (Symbol*)((*(tab->terminals))[i]);
		GenErrorMsg(tErr, sym);
	}

	CopyFramePart(L"-->begin");
	wchar_t *subSrcName = coco_string_create_lower(tab->srcName);
	if (!coco_string_endswith(subSrcName, L"coco.atg")) {
		fclose(gen); OpenGen(L"Parser.h", false); /* pdt */
	}
	coco_string_delete(subSrcName);

	CopyFramePart(L"-->headerdef");

	if (usingPos != NULL) {CopySourcePart(usingPos, 0); fwprintf(gen, L"\n");}
	CopyFramePart(L"-->namespace_open");
	int nrOfNs = GenNamespaceOpen(tab->nsName);

	CopyFramePart(L"-->constantsheader");
	GenTokensHeader();  /* ML 2002/09/07 write the token kinds */
	fwprintf(gen, L"\tint maxT;\n");
	CopyFramePart(L"-->declarations"); CopySourcePart(tab->semDeclPos, 0);
	CopyFramePart(L"-->productionsheader"); GenProductionsHeader();
	CopyFramePart(L"-->namespace_close");
	GenNamespaceClose(nrOfNs);

	CopyFramePart(L"-->implementation");
	fclose(gen);

	// Source
	OpenGen(L"Parser.cpp", true); /* pdt */
	CopyFramePart(L"-->begin");
	subSrcName = coco_string_create_lower(tab->srcName);
	if (!coco_string_endswith(subSrcName, L"coco.atg")) {
		fclose(gen); OpenGen(L"Parser.cpp", false); /* pdt */
	}
	coco_string_delete(subSrcName);
	CopyFramePart(L"-->namespace_open");
	nrOfNs = GenNamespaceOpen(tab->nsName);

	CopyFramePart(L"-->pragmas"); GenCodePragmas();
	CopyFramePart(L"-->productions"); GenProductions();
	CopyFramePart(L"-->parseRoot"); fwprintf(gen, L"\t%ls();\n", tab->gramSy->name);
	CopyFramePart(L"-->constants");
	fwprintf(gen, L"\tmaxT = %d;\n", tab->terminals->Count-1);
	CopyFramePart(L"-->initialization"); InitSets();
	CopyFramePart(L"-->errors"); fwprintf(gen, L"%ls", err);
	CopyFramePart(L"-->namespace_close");
	GenNamespaceClose(nrOfNs);
	CopyFramePart(L"$$$");
	fclose(gen);
	buffer->SetPos(oldPos);
}


void ParserGen::WriteStatistics () {
	fwprintf(trace, L"\n");
	fwprintf(trace, L"%d terminals\n", tab->terminals->Count);
	fwprintf(trace, L"%d symbols\n", tab->terminals->Count + tab->pragmas->Count +
	                               tab->nonterminals->Count);
	fwprintf(trace, L"%d nodes\n", tab->nodes->Count);
	fwprintf(trace, L"%d sets\n", symSet->Count);
}


ParserGen::ParserGen (Parser *parser) {
	maxTerm = 3;
	CR = '\r';
	LF = '\n';
	tErr = 0;
	altErr = 1;
	syncErr = 2;
	tab = parser->tab;
	errors = parser->errors;
	trace = parser->trace;
	buffer = parser->scanner->buffer;
	errorNr = -1;
	usingPos = NULL;

	symSet = new ArrayList();
	err = NULL;
}

}; // namespace
