/* 
** Modular Logfile Analyzer
** Copyright 2000 Jan Kneschke <jan@kneschke.de>
**
** Homepage: http://www.kneschke.de/projekte/modlogan
**

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version, and provided that the above
    copyright and permission notice is included with all distributed
    copies of this or derived software.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA

**
** $Id: mstate.c,v 1.46 2002/01/04 13:56:51 ostborn Exp $
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/param.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>

#include <libxml/parser.h>
#include <libxml/xmlerror.h>

#include "config.h"
#include "mlist.h"
#include "mstate.h"
#include "mdatatypes.h"
#include "mlocale.h"

#define M_DEBUG_SAX_ENTRY 0
#define M_DEBUG_SAX_DISPATCH 0

#define STATE_FILE_VERSION	"0.5"

typedef struct {
	char	*string;
	int	type;
	void	*dest;
} mstate_values;

mstate *mstate_init() {
	mstate *state = malloc(sizeof(mstate));
	assert(state);
	
	state->year		= 0;
	state->month		= 0;
	state->timestamp	= 0;
	
	state->ext		= NULL;
	state->ext_type		= M_STATE_TYPE_UNSET;
	
	return state;
}

void mstate_free(mstate *state) {
	if (!state) return;
	
	if (state->ext) {
		switch(state->ext_type) {
		case M_STATE_TYPE_WEB:
			mstate_free_web(state->ext);
			break;
		case M_STATE_TYPE_TELECOM:
			mstate_free_telecom(state->ext);
			break;
		case M_STATE_TYPE_TRAFFIC:
			mstate_free_traffic(state->ext);
			break;
		case M_STATE_TYPE_MAIL:
			mstate_free_mail(state->ext);
			break;
		case M_STATE_TYPE_UNSET:
			break;
		default:
			fprintf(stderr, "%s.%d: unknown substate type %d\n", __FILE__, __LINE__, state->ext_type);
		}
	}
	
	free(state);
}

mstate_web *mstate_init_web() {
	mstate_web *state = malloc(sizeof(mstate_web));
	assert(state);
	memset(state, 0, sizeof(mstate_web));
	
	state->visit_list	= mlist_init();
	state->indexed_pages	= mhash_init( 4 );
	state->os_hash		= mhash_init( 4 );
	state->ua_hash		= mhash_init( 4 /* 16 */);
	state->req_url_hash	= mhash_init( 4 /* 128 */);
	state->req_prot_hash	= mhash_init( 4 );
	state->req_meth_hash	= mhash_init( 4 );
	state->status_hash	= mhash_init( 4 );
	state->host_hash	= mhash_init( 4 /* 512 */ );
	state->ref_url_hash	= mhash_init( 4 /* 64 */ );
	state->robots		= mhash_init( 4 );
	state->bookmarks	= mhash_init( 4 );
	state->status_internal_error = mhash_init( 4 );
	state->status_missing_file = mhash_init( 4 /* 16 */);
	state->searchstring	= mhash_init( 4 /* 32 */ );
	state->searchsite	= mhash_init( 4 );
	state->country_hash	= mhash_init( 4 );
	state->extension	= mhash_init( 4 );
	state->visits     	= mhash_init( 4 /* 256 */);
	state->views     	= mhash_init( 4 /* 64 */);
	state->vhost_hash	= mhash_init( 4 );
#if 0
#define D(x) \
	fprintf(stderr, "%p = %s\n", state->x, #x);
	
	D(indexed_pages);
	D(os_hash		);
	D(ua_hash		);
	D(req_url_hash	);
	D(req_prot_hash	);
	D(req_meth_hash	);
	D(status_hash	);
	D(host_hash	);
	D(ref_url_hash	);
	D(robots		);
	D(bookmarks	);
	D(status_internal_error );
	D(status_missing_file );
	D(searchstring	);
	D(searchsite	);
	D(country_hash	);
	D(extension	);
	D(visits     	);
	D(views     	);
	D(vhost_hash	);
#undef D
#endif
	return state;
}

void mstate_free_web(mstate_web *state) {
	if (!state) return;
#if 0	
#define D(x) \
	fprintf(stderr, "%p = %s - %d\n", state->x, #x, state->x->size);
	
	D(indexed_pages);
	D(os_hash		);
	D(ua_hash		);
	D(req_url_hash	);
	D(req_prot_hash	);
	D(req_meth_hash	);
	D(status_hash	);
	D(host_hash	);
	D(ref_url_hash	);
	D(robots		);
	D(bookmarks	);
	D(status_internal_error );
	D(status_missing_file );
	D(searchstring	);
	D(searchsite	);
	D(country_hash	);
	D(extension	);
	D(visits     	);
	D(views     	);
	D(vhost_hash	);
#undef D
#endif	
	mhash_free(state->os_hash);
	mhash_free(state->ua_hash);
	mhash_free(state->req_prot_hash);
	mhash_free(state->req_meth_hash);
	mhash_free(state->status_hash);
	mhash_free(state->host_hash);
	mhash_free(state->req_url_hash);
	mhash_free(state->ref_url_hash);
	mhash_free(state->bookmarks);
	mhash_free(state->robots);
	mhash_free(state->status_internal_error);
	mhash_free(state->status_missing_file);
	mhash_free(state->searchstring);
	mhash_free(state->searchsite);
	mhash_free(state->country_hash);
	mhash_free(state->indexed_pages);
	mhash_free(state->extension);
	mhash_free(state->visits);
	mhash_free(state->views);
	mhash_free(state->vhost_hash);
	
	mlist_free(state->visit_list);
	
	free(state);
}

mstate_telecom *mstate_init_telecom() {
	int i;
	
	mstate_telecom *state = malloc(sizeof(mstate_telecom));
	assert(state);
	
	state->called_numbers	= mhash_init( 128 );
	state->calling_numbers	= mhash_init( 128 );
	
	for (i = 0; i < 24; i++) {
		state->hours[i].incomming_calls = 0;
		state->hours[i].outgoing_calls = 0;
	}
	
	for (i = 0; i < 31; i++) {
		state->days[i].incomming_calls = 0;
		state->days[i].outgoing_calls = 0;
	}
	
	return state;
}

void mstate_free_telecom(mstate_telecom *state) {
	if (!state) return;
	
	mhash_free(state->called_numbers);
	mhash_free(state->calling_numbers);
	
	free(state);
}

mstate_mail *mstate_init_mail() {
	mstate_mail *state = malloc(sizeof(mstate_mail));
	assert(state);
	
	memset(state, 0, sizeof(mstate_mail));
	
	state->sender = mhash_init( 4 );
	state->receipient = mhash_init( 4 );
	state->send_domain = mhash_init( 4 );
	state->recp_domain = mhash_init( 4 );
	
	/* virus stuff */
	state->virus = mhash_init( 4 );
	state->subject = mhash_init( 4 );
	state->scanner = mhash_init( 4 );
	
	return state;
}

void mstate_free_mail(mstate_mail *state) {
	if (!state) return;
	
	mhash_free(state->sender);
	mhash_free(state->receipient);
	mhash_free(state->recp_domain);
	mhash_free(state->send_domain);
	
	mhash_free(state->virus);
	mhash_free(state->subject);
	mhash_free(state->scanner);
	
	free(state);
}

mstate_traffic *mstate_init_traffic() {
	mstate_traffic *state = malloc(sizeof(mstate_traffic));
	assert(state);
	
	state->incomming	= mhash_init( 128 );
	state->outgoing		= mhash_init( 128 );
	state->external		= mhash_init( 128 );
	state->internal		= mhash_init( 128 );
	
	return state;
}

void mstate_free_traffic(mstate_traffic *state) {
	if (!state) return;
	
	mhash_free(state->incomming);
	mhash_free(state->outgoing);
	mhash_free(state->external);
	mhash_free(state->internal);
	
	free(state);
}

int mhash_write(gzFile *fd, mhash *h) {
	int i;

	for (i = 0; i < h->size; i++) {
		if (h->data[i]->list) {
			mlist_write(fd, h->data[i]->list);
		}
	}
	
	return 0;
}

int mstate_write_web (mconfig *conf, mstate_web *state, gzFile *fd) {
	int i;
	
	gzprintf(fd, "<%s>\n", M_STATE_SECTION_WEB);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_REQ_URL);
	mhash_write(fd, state->req_url_hash);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_REQ_URL);

	gzprintf(fd, "<%s>\n", M_STATE_WEB_REQ_METHOD);
	mhash_write(fd, state->req_meth_hash);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_REQ_METHOD);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_REQ_PROTOCOL);
	mhash_write(fd, state->req_prot_hash);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_REQ_PROTOCOL);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_REF_URL);
	mhash_write(fd, state->ref_url_hash);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_REF_URL);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_OS);
	mhash_write(fd, state->os_hash);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_OS);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_USERAGENT);
	mhash_write(fd, state->ua_hash);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_USERAGENT);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_HOST);
	mhash_write(fd, state->host_hash);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_HOST);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_STATUS);
	mhash_write(fd, state->status_hash);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_STATUS);

	gzprintf(fd, "<%s>\n", M_STATE_WEB_VISIT);
	mlist_write(fd, state->visit_list);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_VISIT);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_COUNTRIES);
	mhash_write(fd, state->country_hash);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_COUNTRIES);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_ROBOTS);
	mhash_write(fd, state->robots);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_ROBOTS);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_SEARCHSITE);
	mhash_write(fd, state->searchsite);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_SEARCHSITE);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_SEARCHSTRING);
	mhash_write(fd, state->searchstring);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_SEARCHSTRING);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_INT_ERROR);
	mhash_write(fd, state->status_internal_error);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_INT_ERROR);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_MISSING_FILE);
	mhash_write(fd, state->status_missing_file);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_MISSING_FILE);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_BOOKMARKS);
	mhash_write(fd, state->bookmarks);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_BOOKMARKS);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_INDEXED_PAGES);
	mhash_write(fd, state->indexed_pages);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_INDEXED_PAGES);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_EXTENSIONS);
	mhash_write(fd, state->extension);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_EXTENSIONS);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_VISITS);
	mhash_write(fd, state->visits);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_VISITS);

	gzprintf(fd, "<%s>\n", M_STATE_WEB_VIEWS);
	mhash_write(fd, state->views);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_VIEWS);

	gzprintf(fd, "<%s>\n", M_STATE_WEB_SRVHOST);
	mhash_write(fd, state->vhost_hash);
	gzprintf(fd, "</%s>\n", M_STATE_WEB_SRVHOST);

	gzprintf(fd, "<%s>\n", M_STATE_WEB_HOURS);
	for (i = 0; i < 24; i++) {
		gzprintf(fd, "<hour index=\"%d\">\n", i);
		
		
#define WR(x) \
	gzprintf(fd, "<%s>%ld</%s>", #x, state->hours[i].x, #x);
		
		WR(hits);
		WR(files);
		WR(pages);
		WR(visits);
		WR(hosts);
#undef WR
#define WR(x) \
	gzprintf(fd, "<%s>%.0f</%s>", #x, state->hours[i].x, #x);
		
		WR(xfersize);
#undef WR
		gzprintf(fd, "</hour>\n");
	}
	gzprintf(fd, "</%s>\n", M_STATE_WEB_HOURS);
	
	gzprintf(fd, "<%s>\n", M_STATE_WEB_DAYS);
	for (i = 0; i < 31; i++) {
		gzprintf(fd, "<day index=\"%d\">\n", i);
#define WR(x) \
	gzprintf(fd, "<%s>%ld</%s>", #x, state->days[i].x, #x);
		
		WR(hits);
		WR(files);
		WR(pages);
		WR(visits);
		WR(hosts);
#undef WR
#define WR(x) \
	gzprintf(fd, "<%s>%.0f</%s>", #x, state->days[i].x, #x);
		
		WR(xfersize);
#undef WR
		gzprintf(fd, "</day>\n");
	}
	gzprintf(fd, "</%s>\n", M_STATE_WEB_DAYS);
	gzprintf(fd, "</%s>\n", M_STATE_SECTION_WEB);

	return 0;
}

int mstate_write_mail (mconfig *conf, mstate_mail *state, gzFile *fd) {
	int i;
	
	gzprintf(fd, "<%s>\n", M_STATE_SECTION_MAIL);

	gzprintf(fd, "<%s>\n", M_STATE_MAIL_SENDER);
	mhash_write(fd, state->sender);
	gzprintf(fd, "</%s>\n", M_STATE_MAIL_SENDER);

	gzprintf(fd, "<%s>\n", M_STATE_MAIL_RECEIPIENT);
	mhash_write(fd, state->receipient);
	gzprintf(fd, "</%s>\n", M_STATE_MAIL_RECEIPIENT);
	
	gzprintf(fd, "<%s>\n", M_STATE_MAIL_SEND_DOMAIN);
	mhash_write(fd, state->send_domain);
	gzprintf(fd, "</%s>\n", M_STATE_MAIL_SEND_DOMAIN);
	
	gzprintf(fd, "<%s>\n", M_STATE_MAIL_RECP_DOMAIN);
	mhash_write(fd, state->recp_domain);
	gzprintf(fd, "</%s>\n", M_STATE_MAIL_RECP_DOMAIN);
	
	gzprintf(fd, "<%s>\n", M_STATE_MAIL_VIRUS);
	mhash_write(fd, state->virus);
	gzprintf(fd, "</%s>\n", M_STATE_MAIL_VIRUS);
	
	gzprintf(fd, "<%s>\n", M_STATE_MAIL_SCANNER);
	mhash_write(fd, state->scanner);
	gzprintf(fd, "</%s>\n", M_STATE_MAIL_SCANNER);
	
	gzprintf(fd, "<%s>\n", M_STATE_MAIL_SUBJECT);
	mhash_write(fd, state->subject);
	gzprintf(fd, "</%s>\n", M_STATE_MAIL_SUBJECT);
	
	gzprintf(fd, "<%s>\n", M_STATE_MAIL_HOURS);
	for (i = 0; i < 24; i++) {
		gzprintf(fd, "<hour index=\"%d\">\n", i);
		
#define WR(x) \
	gzprintf(fd, "<%s>%ld</%s>\n", #x, state->hours[i].x, #x);
		
		WR(incomming_mails);
		WR(outgoing_mails);
		WR(incomming_bytes);
		WR(outgoing_bytes);
#undef WR
		gzprintf(fd, "</hour>\n");
	}
	gzprintf(fd, "</%s>\n", M_STATE_MAIL_HOURS);

	gzprintf(fd, "<%s>\n", M_STATE_MAIL_DAYS);
	for (i = 0; i < 31; i++) {
		gzprintf(fd, "<day index=\"%d\">\n", i);
#define WR(x) \
	gzprintf(fd, "<%s>%ld</%s>\n", #x, state->days[i].x, #x);
		
		WR(incomming_mails);
		WR(outgoing_mails);
		WR(incomming_bytes);
		WR(outgoing_bytes);
#undef WR
		gzprintf(fd, "</day>\n");
	}
	gzprintf(fd, "</%s>\n", M_STATE_MAIL_DAYS);
	
	gzprintf(fd, "</%s>\n", M_STATE_SECTION_MAIL);

	return 0;
}

int mstate_write_telecom (mconfig *conf, mstate_telecom *state, gzFile *fd) {
	return 0;
}

int mstate_write_traffic (mconfig *conf, mstate_traffic *state, gzFile *fd) {
	return 0;
}

int mstate_write (mconfig *conf, mstate *state, int _add_month, char *subpath) {
	char filename[255], *s;
	gzFile *fd;
	
	if (state == NULL) return -1;

	if (subpath) {
		/* protect the filename buffer */
		if (strlen(conf->outputdir) + strlen(subpath) > (sizeof(filename) - 48) ) {
			fprintf(stderr, "%s.%d: outputdir (%s) + subpath (%s) are too long\n", 
				__FILE__, __LINE__, 
				conf->outputdir, subpath);
			return -1;
		}
		sprintf(filename, "%s/%s/",
			conf->outputdir ? conf->outputdir : ".",
			subpath);
#if 0		
		/* take care of evil filenames */
		if (realpath(filename,dirname) == NULL) {
			fprintf(stderr, "%s.%d: realpath failed on '%s' (%s): %s\n", 
				__FILE__, __LINE__, 
				filename, 
				dirname,
				strerror(errno));
			return -1;
		}
		
		/* make sure that we are still in the outputdir */
		if (0 != strncmp(conf->outputdir, dirname, strlen(conf->outputdir))) {
			fprintf(stderr, "%s.%d: outputdir (%s) != (part) resulting dirname (%s)\n", 
				__FILE__, __LINE__, 
				conf->outputdir, dirname);
			return -1;
		}
#endif
		/* protect outputdir */
		s = subpath;
		if (*s == '.' && *(s+1) == '.') {
			/* someone want to leave the outputdir */
			fprintf(stderr, "%s.%d: subpath contains 2 dots (..) at the beginning: %s\n", 
				__FILE__, __LINE__, 
				subpath);
			return -1;
		}
		for (s = subpath; *s; s++) {
			if (*s == '/' && *(s+1) == '.' && *(s+2) == '.' && *(s+3) == '/') {
				fprintf(stderr, "%s.%d: subpath contains '/../': %s\n", 
					__FILE__, __LINE__, 
					subpath);
				return -1;
			}
		}
		
		mkdir(filename, 0755);
		
		if (_add_month == M_STATE_WRITE_DEFAULT) {
			sprintf(filename, "%s/%s/mla.state.xml", 
				conf->outputdir ? conf->outputdir : ".", 
				subpath);
		} else {
			sprintf(filename, "%s/%s/mla.state.xml.%04d%02d", 
				conf->outputdir ? conf->outputdir : ".", 
				subpath,
				state->year, state->month);
		}
	} else {
		if (_add_month == M_STATE_WRITE_DEFAULT) {
			sprintf(filename, "%s/mla.state.xml", 
				conf->outputdir ? conf->outputdir : ".");
		} else {
			sprintf(filename, "%s/mla.state.xml.%04d%02d", 
				conf->outputdir ? conf->outputdir : ".", 
				state->year, state->month);
		}
	}

	
	if (conf->debug_level > 1) {
		fprintf(stderr, "%s.%d: STATE-Filename: %s\n", __FILE__, __LINE__, filename);
	}
	
	if ((fd = gzopen(filename, "wb")) == NULL) {
		fprintf(stderr, "%s.%d: can't open %s: %s\n", __FILE__, __LINE__, filename, strerror(errno));
		return -1;
	}

/* header */
	gzprintf(fd, "<?xml version=\"1.0\"?>\n");
	gzprintf(fd, "<!DOCTYPE state PUBLIC \"http://www.kneschke.de/projekte/modlogan/mla.state.dtd\" \"mla.state.dtd\">\n");
	gzprintf(fd, "<state version=\"%s\" package=\"%s\">\n",
		STATE_FILE_VERSION, PACKAGE);
	
/* global values */
	
	gzprintf(fd, "<global type=\"%s\">", 
		state->week ? "weekly" : "monthly");
	
	gzprintf(fd, "<timestamp>%ld</timestamp>", state->timestamp);
	gzprintf(fd, "<year>%d</year>", state->year);
	gzprintf(fd, "<month>%d</month>", state->month);
	gzprintf(fd, "<week>%d</week>", state->week);
	gzprintf(fd, "</global>\n");

	/* FIXME: input, processor, output */
	if (state->ext) {
		switch(state->ext_type) {
		case M_STATE_TYPE_WEB:
			mstate_write_web(conf, state->ext, fd);
			break;
		case M_STATE_TYPE_TELECOM:
			mstate_write_telecom(conf, state->ext, fd);
			break;
		case M_STATE_TYPE_TRAFFIC:
			mstate_write_traffic(conf, state->ext, fd);
			break;
		case M_STATE_TYPE_MAIL:
			mstate_write_mail(conf, state->ext, fd);
			break;
		default:
			fprintf(stderr, "%s.%d: unknown substate type\n", __FILE__, __LINE__);
		}
	} else {
		if (conf->debug_level > 1)
			fprintf(stderr, "%s.%d: no state extension\n", __FILE__, __LINE__);
	}
	
	gzprintf(fd, "</state>\n");
	gzclose(fd);
	
	return 0;
}

#define M_STATE_DATATYPE_UNSET	0
#define M_STATE_DATATYPE_HASH	1
#define M_STATE_DATATYPE_LONG	2
#define M_STATE_DATATYPE_LIST	3
#define M_STATE_DATATYPE_WEB_ARRAY 4

#define M_STATE_DATASUBTYPE_UNSET	0

int mstate_insert_value(mstate_stack *m, int tagtype, void *dest, int type, const xmlChar *value, const xmlChar **attrs) {
#if M_DEBUG_SAX_ENTRY
	int i;
	M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
		 "|--> '%s' - %s - %d\n", 
		 value, tagtype == 1 ? "open" : (tagtype == 2 ? "close" : (tagtype == 3 ? "text" : "unknown")), type);
	M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
		 "o->");
	for (i = 0; i < m->st_depth_max + 2; i++) {
		fprintf(stderr, " %d (%p)", m->st[i].id, m->st[i].data);
	}
	fprintf(stderr, "\n");
#endif	
	switch (type) {
	case M_STATE_DATATYPE_LONG: {
		switch (tagtype) {
		case M_TAG_TEXT: {
			int str = *(int *)(dest);
			
			str = strtol(value, NULL, 10);
			
			*(int *)(dest) = str;
			break;
		}
		default:
			M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "unhandled tagtype: %d\n",
				 tagtype);
			return -1;
		}
		break;	
	}
		
	case M_STATE_DATATYPE_HASH: {
		mhash *hash = *(mhash **) dest;
#if 0
		M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "datatype hash\n");
#endif
		
		switch (tagtype) {
		case M_TAG_BEGIN: {
			if (m->st_depth != m->st_depth_max) {
				mdata *data;
#if M_DEBUG_SAX_DISPATCH
				M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
					 m->st_depth, 
					 m->st_depth_max,
					 value);
#endif				
				m->st_depth++;
				data = m->st[m->st_depth].data;
				if (data == NULL) {
					m->st[m->st_depth].data = data = mdata_init();
#if 0
					M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "setting data = %p, %s, %d\n",
						 data, value, data->type);
#endif
				}
				
				if (mdata_read(m, tagtype, data, type, value, attrs)) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "mdata_read failed\n");
					return -1;
				}
				
			} else {
				mdata *data;
				m->st_depth_max++;
				m->st_depth++;
				m->st[m->st_depth].id = type;
				m->st[m->st_depth].data = data = mdata_init();
				
#if 0
				M_DEBUG2(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "setting data = %p, %s\n",
					 data, value);
#endif
				
				if (mdata_read(m, tagtype, data, type, value, attrs)) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "mdata_read failed\n");
					return -1;
				}
			}
			break;
		}
		case M_TAG_END:	{
			mdata *data = m->st[m->st_depth].data;
			int std = m->st_depth;
			if (m->st_depth != m->st_depth_max) {
#if M_DEBUG_SAX_DISPATCH
				M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
					 m->st_depth, 
					 m->st_depth_max,
					 value);
#endif
				m->st_depth++;
				if (mdata_read(m, tagtype, data, type, value, attrs)) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "mdata_read failed\n");
					return -1;
				}

				if (m->st_depth_max == std) {
					if (data && data->key) {
#if 0
						M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
							 "inserting data = %p, %s, %d\n",
							 data, data->key, data->type);
#endif
						mhash_insert_sorted(hash, data);
					} else if (data) {
						M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
							 "have to call mdata_free\n");
						mdata_free(data);
					}
					m->st[std].data = data = NULL;
#if 0
					M_DEBUG2(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "unsetting data = %p, %s\n",
						 data, value);
#endif
				}
			} else {
				if (mdata_read(m, tagtype, data, type, value, attrs)) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "mdata_read failed\n");
					return -1;
				}
				
#if M_DEBUG_SAX_ENTRY
				M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "o--> '%s' - %s - %d\n", 
					 value, tagtype == 1 ? "open" : (tagtype == 2 ? "close" : (tagtype == 3 ? "text" : "unknown")), type);
				M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "o->");
				for (i = 0; i < m->st_depth_max + 2; i++) {
					fprintf(stderr, " %d (%p)", m->st[i].id, m->st[i].data);
				}
				fprintf(stderr, "\n");
#endif
				
				m->st[std].id = 0;
				m->st_depth_max--;
				
				if (data && data->key) {
#if 1
					M_DEBUG2(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "inserting data = %p, %s\n",
						 data, value);
#endif
					mhash_insert_sorted(hash, data);
				} else if (data) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "have to call mdata_free\n");
					mdata_free(data);
				}
				m->st[std].data = data = NULL;
#if 0
				M_DEBUG2(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "unsetting data = %p, %s\n",
					 data, value);
#endif
				
			}
			break;
		}
		case M_TAG_TEXT: {
			if (m->st_depth != m->st_depth_max) {
				mdata *data = m->st[m->st_depth].data;
				if (data != NULL) {
					m->st_depth++;
					if (mdata_read(m, tagtype, data, type, value, attrs)) {
						M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
							 "mdata_read failed\n");
						return -1;
					}
				} else {
#if 0
					M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "ignoring '%s'\n", value);
#endif
				}
			} else {
				/* ignore */
			}
			break;
		}
		default:
			M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "unhandled tagtype: %d - %s - %d\n",
				 tagtype,
				 value,
				 type);
			return -1;
		}
		
		break;
	}
		
	case M_STATE_DATATYPE_LIST: {
		mlist *list = *(mlist **) dest;
#if 0
		M_DEBUG2(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "datatype list: %s - %d\n",
			 value,
			 tagtype);
#endif
		switch (tagtype) {
		case M_TAG_BEGIN: {
			if (m->st_depth != m->st_depth_max) {
				mdata *data;
#if M_DEBUG_SAX_DISPATCH
				M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
					 m->st_depth, 
					 m->st_depth_max,
					 value);
#endif				
				m->st_depth++;
				data = m->st[m->st_depth].data;
#if 0
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "setting data = %p\n",
					 data);
#endif
				if (mdata_read(m, tagtype, data, type, value, attrs)) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "mdata_read failed\n");
					return -1;
				}
			} else {
				mdata *data;
				m->st_depth_max++;
				m->st_depth++;
				m->st[m->st_depth].id = type;
				m->st[m->st_depth].data = data = mdata_init();
				assert(data);
#if 0
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "setting data = %p\n",
					 data);
#endif
				if (mdata_read(m, tagtype, data, type, value, attrs)) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "mdata_read failed\n");
					return -1;
				}
			}
			break;
		}
		case M_TAG_END:	{
			mdata *data = m->st[m->st_depth].data;
			if (m->st_depth != m->st_depth_max) {
#if M_DEBUG_SAX_DISPATCH
				M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
					 m->st_depth, 
					 m->st_depth_max,
					 value);
#endif
				m->st_depth++;
				if (mdata_read(m, tagtype, data, type, value, attrs)) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "mdata_read failed\n");
					return -1;
				}
			} else {
				if (mdata_read(m, tagtype, data, type, value, attrs)) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "mdata_read failed\n");
					return -1;
				}
				
				m->st[m->st_depth].id = 0;
				m->st_depth_max--;
				
				if (data && data->key) {
#if 0
					M_DEBUG2(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "inserting data = %p, %s\n",
						 data, value);
#endif
					mlist_insert(list, data);
				} else if (data) {
					mdata_free(data);
				}
				m->st[m->st_depth].data = NULL;
#if 0
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "unsetting data = %p\n",
					 data);
#endif
			}
			break;
		}
		case M_TAG_TEXT: {
			if (m->st_depth != m->st_depth_max) {
				mdata *data = m->st[m->st_depth].data;
				m->st_depth++;
				
				if (mdata_read(m, tagtype, data, type, value, attrs)) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "mdata_read failed\n");
					return -1;
				}
			} else {
				/* ignore */
			}
			break;
		}
		default:
			M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "unhandled tagtype: %d - %s - %d\n",
				 tagtype,
				 value,
				 type);
		}
		
		break;
	}
	case M_STATE_DATATYPE_WEB_ARRAY: {
		int i;
		marray_web *wa;
		static mdata_values data_values[] = {
			{ "hits", M_DATA_FIELDTYPE_LONG, 	NULL },
			{ "files", M_DATA_FIELDTYPE_LONG, 	NULL },
			{ "pages", M_DATA_FIELDTYPE_LONG, 	NULL },
			{ "visits", M_DATA_FIELDTYPE_LONG, 	NULL },
			{ "hosts", M_DATA_FIELDTYPE_LONG, 	NULL },
			{ "xfersize", M_DATA_FIELDTYPE_DOUBLE, 	NULL },
			
			{ NULL, M_DATA_FIELDTYPE_UNSET,  NULL}
		};
			
		wa = (marray_web *) dest;

		switch (tagtype) {
		case M_TAG_BEGIN: {
			if (m->st_depth == m->st_depth_max - 1) {
#if M_DEBUG_SAX_DISPATCH
				M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
					 m->st_depth, 
					 m->st_depth_max,
					 value);
#endif				
				for (i = 0; data_values[i].string != NULL && 0 != strcmp(data_values[i].string, value); i++)
					;
				if (data_values[i].string != NULL) {
					m->st_depth_max++;
					m->st_depth++;
					m->st[m->st_depth].id = i + 1;
				} else {
					M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "unknown tag '%s'\n", 
						 value);
					return -1;
				}
				
			} else if (m->st_depth == m->st_depth_max) {
				m->st_depth_max++;
				m->st_depth++;
				m->st[m->st_depth].id = type;
				
				if (attrs && attrs[0] && 0 == strcmp("index", attrs[0])
				    && attrs[1]) {
					int ndx;
					
					ndx = strtol(attrs[1], NULL, 10);

					data_values[0].dest = &(wa[ndx].hits);
					data_values[1].dest = &(wa[ndx].files);
					data_values[2].dest = &(wa[ndx].pages);
					data_values[3].dest = &(wa[ndx].visits);
					data_values[4].dest = &(wa[ndx].hosts);
					data_values[5].dest = &(wa[ndx].xfersize);
				} else {
					M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "no index set for '%s'\n",
						 value);
					return -1;
				}
			} else {
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "oorgs '%s'\n",
					 value);
				return -1;
			}
			break;
		}
		case M_TAG_END:	{
			if (m->st_depth == m->st_depth_max - 1) {
				for (i = 0; data_values[i].string != NULL && 0 != strcmp(data_values[i].string, value); i++)
					;
				if (data_values[i].string != NULL) {
					m->st[m->st_depth].id = 0;
					m->st_depth_max--;
				} else {
					M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "unknown tag '%s'\n", 
						 value);
					return -1;
				}
			} else if (m->st_depth == m->st_depth_max) {
#if 0
				for (i = 0; data_values[i].string != NULL; i++) {
					fprintf(stderr, "--p %s ", data_values[i].string);
					switch(data_values[i].type) {
					case M_DATA_FIELDTYPE_LONG:
						fprintf(stderr, "%ld", *(long *)(data_values[i].dest));
						break;
					case M_DATA_FIELDTYPE_DOUBLE:
						fprintf(stderr, "%f", *(double *)(data_values[i].dest));
						break;
					}
					fprintf(stderr, "\n");
				}
#endif
				m->st[m->st_depth].id = 0;
				m->st_depth_max--;
			} else {
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					  "oorgs '%s'\n",
					  value);
				return -1;
			}
			break;
		}
		case M_TAG_TEXT: {
			if (m->st_depth == m->st_depth_max - 1) {
				if (mdata_insert_value(m, tagtype, 
						       data_values[m->st[m->st_depth].id - 1].dest, 
						       data_values[m->st[m->st_depth].id - 1].type, 
						       value, attrs)) {
					M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "insert failed for '%s'\n", 
						 value);
					return -1;
				} else {
#if 0
					M_DEBUG2(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "inserting '%s' for '%s'\n", 
						 value, data_values[m->st[m->st_depth].id - 1].string);
#endif
				}
#if 0				
				for (i = 0; data_values[i].string != NULL; i++) {
					fprintf(stderr, "--e %s ", data_values[i].string);
					switch(data_values[i].type) {
					case M_DATA_FIELDTYPE_LONG:
						fprintf(stderr, "%ld", *(long *)(data_values[i].dest));
						break;
					case M_DATA_FIELDTYPE_DOUBLE:
						fprintf(stderr, "%f", *(double *)(data_values[i].dest));
						break;
					}
					fprintf(stderr, "\n");
				}
				
#endif
			} else if (m->st_depth == m->st_depth_max) {
				/* ignore */
			} else {
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					  "oorgs '%s'\n",
					  value);
				return -1;
			}
			break;
		}
		default:
			M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "unhandled tagtype: %d - %s - %d\n",
				 tagtype,
				 value,
				 type);
			return -1;
		}
		break;
	}
	default:
		M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "unknown type: %d\n",
			 type);
		return -1;
	}
#if 0
	M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
		 "o-<");
	for (i = 0; i < m->st_depth_max + 2; i++) {
		
		fprintf(stderr, " %d", m->st[i].id);
	}
	fprintf(stderr, "\n");	
#endif	
	return 0;
}

int mstate_read_web (mstate_stack *m, int tagtype, const xmlChar *value, const xmlChar **attrs) {
	int i = 0;
	mstate_web *st;

	mstate_values state_values[] = {
		{ M_STATE_WEB_REQ_URL,	M_STATE_DATATYPE_HASH,		NULL },
		{ M_STATE_WEB_REQ_METHOD, M_STATE_DATATYPE_HASH, 	NULL },
		{ M_STATE_WEB_REQ_PROTOCOL, M_STATE_DATATYPE_HASH, 	NULL },
		{ M_STATE_WEB_REF_URL,	M_STATE_DATATYPE_HASH,		NULL },
		{ M_STATE_WEB_OS,	M_STATE_DATATYPE_HASH,		NULL },
		{ M_STATE_WEB_USERAGENT, M_STATE_DATATYPE_HASH,	        NULL },
		{ M_STATE_WEB_HOST,	M_STATE_DATATYPE_HASH,		NULL },
		{ M_STATE_WEB_STATUS,	M_STATE_DATATYPE_HASH,		NULL },
		{ M_STATE_WEB_VISIT,	M_STATE_DATATYPE_LIST,		NULL },
		{ M_STATE_WEB_HOURS,	M_STATE_DATATYPE_WEB_ARRAY, 	NULL },
		{ M_STATE_WEB_DAYS,	M_STATE_DATATYPE_WEB_ARRAY, 	NULL },
		{ M_STATE_WEB_COUNTRIES, M_STATE_DATATYPE_HASH,	        NULL },
		{ M_STATE_WEB_ROBOTS,	M_STATE_DATATYPE_HASH,		NULL },
		{ M_STATE_WEB_SEARCHSITE, M_STATE_DATATYPE_HASH, 	NULL },
		{ M_STATE_WEB_SEARCHSTRING, M_STATE_DATATYPE_HASH, 	NULL },
		{ M_STATE_WEB_INT_ERROR, M_STATE_DATATYPE_HASH,		NULL },
		{ M_STATE_WEB_MISSING_FILE, M_STATE_DATATYPE_HASH,      NULL },
		{ M_STATE_WEB_BOOKMARKS, M_STATE_DATATYPE_HASH,		NULL },
		{ M_STATE_WEB_INDEXED_PAGES, M_STATE_DATATYPE_HASH, 	NULL },
		{ M_STATE_WEB_EXTENSIONS, M_STATE_DATATYPE_HASH, 	NULL },
		{ M_STATE_WEB_VISITS,    M_STATE_DATATYPE_HASH, 	NULL },
		{ M_STATE_WEB_VIEWS,    M_STATE_DATATYPE_HASH, 		NULL },
		{ M_STATE_WEB_SRVHOST,    M_STATE_DATATYPE_HASH, 	NULL },
		
		{ NULL, M_STATE_DATATYPE_UNSET,  NULL}
	};
	
	if (!(m && m->state && ((mstate *)(m->state))->ext)) {
		M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "no state extension set\n");
		return -1;
	}
	
	st = ((mstate *)(m->state))->ext;
	i = 0;
	state_values[i++].dest = &(st->req_url_hash);
	state_values[i++].dest = &(st->req_meth_hash);
	state_values[i++].dest = &(st->req_prot_hash);
	state_values[i++].dest = &(st->ref_url_hash);
	state_values[i++].dest = &(st->os_hash);
	state_values[i++].dest = &(st->ua_hash);
	state_values[i++].dest = &(st->host_hash);
	state_values[i++].dest = &(st->status_hash);
	state_values[i++].dest = &(st->visit_list);
	state_values[i++].dest = &(st->hours);
	state_values[i++].dest = &(st->days);
	state_values[i++].dest = &(st->country_hash);
	state_values[i++].dest = &(st->robots);
	state_values[i++].dest = &(st->searchsite);
	state_values[i++].dest = &(st->searchstring);
	state_values[i++].dest = &(st->status_internal_error);
	state_values[i++].dest = &(st->status_missing_file);
	state_values[i++].dest = &(st->bookmarks);
	state_values[i++].dest = &(st->indexed_pages);
	state_values[i++].dest = &(st->extension);
	state_values[i++].dest = &(st->visits);
	state_values[i++].dest = &(st->views);
	state_values[i++].dest = &(st->vhost_hash);
	
#if M_DEBUG_SAX_ENTRY
	M_DEBUG2(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
		 "|--> '%s' - %s\n", 
		 value, tagtype == 1 ? "open" : (tagtype == 2 ? "close" : (tagtype == 3 ? "text" : "unknown")));
	M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
		 "o->");
	for (i = 0; i < m->st_depth_max + 2; i++) {
		fprintf(stderr, " %d (%p)", m->st[i].id, m->st[i].data);
	}
	fprintf(stderr, "\n");
#endif	
	switch(tagtype) {
	case M_TAG_BEGIN:
		if (m->st_depth != m->st_depth_max) {
#if 0
			M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
				 m->st_depth, 
				 m->st_depth_max,
				 value);
#endif
			m->st_depth++;
			if (mstate_insert_value(m, tagtype, 
						state_values[m->st[m->st_depth].id - 1].dest, 
						state_values[m->st[m->st_depth].id - 1].type, 
						value, attrs)) {
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "insert failed for '%s'\n", 
					 value);
				return -1;
			}
		} else {
			for (i = 0; state_values[i].string != NULL && 0 != strcmp(state_values[i].string, value); i++)
				;
			if (state_values[i].string != NULL) {
				m->st_depth_max++;
				m->st_depth++;
				m->st[m->st_depth].id = i + 1;
			} else {
				fprintf(stderr, "<%s> unknown\n", value);
				/* unknown tag */
				return -1;
			}
		}
		break;
	case M_TAG_END:
		if (m->st_depth != m->st_depth_max) {
/*			M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
				 m->st_depth, 
				 m->st_depth_max,
				 value);
			*/
			m->st_depth++;
			if (mstate_insert_value(m, tagtype, 
						state_values[m->st[m->st_depth-1].id - 1].dest, 
						state_values[m->st[m->st_depth-1].id - 1].type, 
						value, attrs)) {
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "insert failed for '%s'\n", 
					 value);
				return -1;
			}
		} else {
			for (i = 0; state_values[i].string != NULL && 0 != strcmp(state_values[i].string, value); i++)
				;
			if (state_values[i].string != NULL) {
				m->st[m->st_depth].id = 0;
				m->st_depth_max--;
			} else {
				fprintf(stderr, "</%s> unknown\n", value);
				/* unknown tag */
				return -1;
			}
		}
		break;
	case M_TAG_TEXT:
		if (m->st_depth < m->st_depth_max) {
/*			M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "cur(depth) < max(depth) [%d - %d]- not my job (%s)\n",
				 m->st_depth, 
				 m->st_depth_max,
				 value);*/
			
			m->st_depth++;
			if (mstate_insert_value(m, tagtype, 
						state_values[m->st[m->st_depth-1].id - 1].dest, 
						state_values[m->st[m->st_depth-1].id - 1].type, 
						value, attrs)) {
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "insert failed for '%s'\n", 
					 value);
				return -1;
			}
			
		} else if (m->st_depth > m->st_depth_max) {
			
		} else {
			/*
			if (mstate_insert_value(m, tagtype, 
						state_values[m->st[m->st_depth].id - 1].dest, 
						state_values[m->st[m->st_depth].id - 1].type, 
						value)) {
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "insert failed for '%s'\n", 
					 value);
			}*/
		}
		break;
	default:
		M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "can't handle tagtype '%d'\n", 
			 tagtype);
		return -1;
	}
		
	return 0;
}

int mstate_read_traffic (mstate_stack *m, int tagtype, const xmlChar *value, const xmlChar **attrs) {
	return 0;
}

int mstate_read_telecom (mstate_stack *m, int tagtype, const xmlChar *value, const xmlChar **attrs) {
	return 0;
}

int mstate_read_mail (mstate_stack *m, int tagtype, const xmlChar *value, const xmlChar **attrs) {
	M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
		 "read state for mail stuff not implemented yet - going down hard\n");
	exit(-1);
	return 0;
}

int mstate_read_global (mstate_stack *m, int type, const xmlChar *value, const xmlChar **attrs) {
	int i;
	const mstate_values state_values[] = {
		{ "year", M_STATE_DATATYPE_LONG, 	&(((mstate *)(m->state))->year) },
		{ "month", M_STATE_DATATYPE_LONG, 	&(((mstate *)(m->state))->month) },
		{ "week", M_STATE_DATATYPE_LONG, 	&(((mstate *)(m->state))->week) },
		{ "timestamp", M_STATE_DATATYPE_LONG,   &(((mstate *)(m->state))->timestamp) },
		
		{ NULL, M_STATE_DATATYPE_UNSET,  NULL}
	};
	
	switch(type) {
	case M_TAG_BEGIN:
		if (m->st_depth != m->st_depth_max) {
			M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
			 m->st_depth, 
			 m->st_depth_max,
			 value);
		} else {
			for (i = 0; state_values[i].string != NULL && 0 != strcmp(state_values[i].string, value); i++)
				;
			if (state_values[i].string != NULL) {
				m->st_depth_max++;
				m->st_depth++;
				m->st[m->st_depth].id = i + 1;
			} else {
				/* unknown tag */
			}
		}
		break;
	case M_TAG_END:
		if (m->st_depth != m->st_depth_max) {
			M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
			 m->st_depth, 
			 m->st_depth_max,
			 value);
		} else {
			for (i = 0; state_values[i].string != NULL && 0 != strcmp(state_values[i].string, value); i++)
				;
			if (state_values[i].string != NULL) {
				m->st[m->st_depth].id = 0;
				m->st_depth_max--;
			} else {
				/* unknown tag */
			}
		}
		break;
	case M_TAG_TEXT:
		if (m->st_depth != m->st_depth_max) {
			M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "cur(depth) != max(depth) [%d - %d]- not my job (%s)\n",
			 m->st_depth, 
			 m->st_depth_max,
			 value);
		} else {
			if (mstate_insert_value(m, type, 
						state_values[m->st[m->st_depth].id - 1].dest, 
						state_values[m->st[m->st_depth].id - 1].type, 
						value, attrs)) {
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "insert failed for '%s'\n", 
					 value);
			}
		}
		break;
	default:
		M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "can't handle tagtype '%d'\n", 
			 type);
	}
	
	return 0;
}

static xmlSAXHandler mlaSAXHandler = {
	NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL
};

typedef struct {
	int type;
	char *name;
} Matches;

/**
 * process the start tags of the statefiles 
 * 
 * SAX callback for the XML starttags
 * 
 */

void mstate_startElement(void *user_data, const xmlChar *name, const xmlChar **attrs) {
	int i;
	mstate_stack *m = user_data;
	
	enum {	 M_STATE_ST_UNSET, M_STATE_ST_WEB, 
		 M_STATE_ST_TELECOM, M_STATE_ST_TRAFFIC,
		 M_STATE_ST_GLOBAL, M_STATE_ST_MAIL 
	};
		
	
	const Matches matches [] = { 
		{ M_STATE_ST_WEB, M_STATE_SECTION_WEB },
		{ M_STATE_ST_TELECOM, M_STATE_SECTION_TELECOM },
		{ M_STATE_ST_TRAFFIC, M_STATE_SECTION_TRAFFIC },
		{ M_STATE_ST_GLOBAL, M_STATE_SECTION_GLOBAL },
		{ M_STATE_ST_MAIL, M_STATE_SECTION_MAIL },
		
		{ M_STATE_ST_UNSET, NULL } 
	};
	
	/* level 0 <state> */
	m->st_depth = 0;
	
	/* setup the array if we are here the first time */
	if (m->st[0].id == -1) {
		for (i = 0; i < M_STATE_ST_ELEM; i++) {
			m->st[i].id = 0;
			m->st[i].data = NULL;
		}
	}
	
	/* the first element */
	if (m->st[0].id == 0) {
		/* should be state */
		if (0 == strcmp(name, "state")) {
			m->st[0].id = 1;
			return;
		} else {
			exit(-1);
		}
	}
#if 0
	fprintf(stderr, "--> %s\n", name);
#endif 
	/* level 1 <web>, <global> */
	m->st_depth++;
	
	/* no level 1 set */
	if (m->st[m->st_depth].id == M_STATE_ST_UNSET) {
		i = 0;
		for ( i = 0; matches[i].name != NULL && 0 != strcmp(name, matches[i].name); i++)
			;
		
		if (matches[i].name == NULL) {
			/* unknown section */
			M_DEBUG2(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "unknown section '%s' - level %d - going down hard\n", name, m->st_depth);
			exit(-1);
		} else {
			switch (matches[i].type) {
			case M_STATE_ST_GLOBAL:
			case M_STATE_ST_WEB:
			case M_STATE_ST_MAIL:
			case M_STATE_ST_TRAFFIC:
			case M_STATE_ST_TELECOM:
				m->st[m->st_depth].id = matches[i].type;
				m->st_depth_max++;
				break;
			default:
				M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "unhandled section '%s' - going down hard\n", name);
				exit(-1);
			}
		}
	} else {
		/* as level 1 is set, send the open-tag to the next level */
		switch (m->st[m->st_depth].id) {
		case M_STATE_ST_GLOBAL:
			
			if (mstate_read_global (m, M_TAG_BEGIN, name, attrs)) {
				M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "read failed - going down hard\n");
				exit(-1);
			}
			
			break;
		case M_STATE_ST_WEB:
			
			if (((mstate *)(m->state))->ext) {
				if (((mstate *)(m->state))->ext_type != M_STATE_TYPE_WEB) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "state type != web - going down hard\n");
					exit(-1);
				}
			} else {
				((mstate *)(m->state))->ext = mstate_init_web();
				((mstate *)(m->state))->ext_type = M_STATE_TYPE_WEB;
			}
			
			if (mstate_read_web (m, M_TAG_BEGIN, name, attrs)) {
				M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "read failed - going down hard\n");
				exit(-1);
			}
			
			break;
		case M_STATE_ST_MAIL:
			
			if (((mstate *)(m->state))->ext) {
				if (((mstate *)(m->state))->ext_type != M_STATE_TYPE_MAIL) {
					M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
						 "state type != mail - going down hard\n");
					exit(-1);
				}
			} else {
				((mstate *)(m->state))->ext = mstate_init_mail();
				((mstate *)(m->state))->ext_type = M_STATE_TYPE_MAIL;
			}
			
			if (mstate_read_mail (m, M_TAG_BEGIN, name, attrs)) {
				M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "read failed - going down hard\n");
				exit(-1);
			}
			
			break;
		default:
			M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "this can't happen - going down hard\n");
			exit(-1);
		}
	}
}

/**
 * process the end tags of the statefiles 
 * 
 * SAX callback for the XML endtags
 * 
 */

void mstate_endElement(void *user_data, const xmlChar *name) {
	int i;
	mstate_stack *m = user_data;
	
	enum {	 M_STATE_ST_UNSET, M_STATE_ST_WEB, 
		 M_STATE_ST_TELECOM, M_STATE_ST_TRAFFIC,
		 M_STATE_ST_GLOBAL, M_STATE_ST_MAIL 
	};
		
	
	const Matches matches [] = { 
		{ M_STATE_ST_WEB, M_STATE_SECTION_WEB },
		{ M_STATE_ST_TELECOM, M_STATE_SECTION_TELECOM },
		{ M_STATE_ST_TRAFFIC, M_STATE_SECTION_TRAFFIC },
		{ M_STATE_ST_GLOBAL, M_STATE_SECTION_GLOBAL },
		{ M_STATE_ST_MAIL, M_STATE_SECTION_MAIL },
		
		{ M_STATE_ST_UNSET, NULL } 
	};
	
	/* level 0 <state> */
	m->st_depth = 0;
	
	/* setup the array if we are here the first time */
	if (m->st[0].id == -1) {
		for (i = 0; i < M_STATE_ST_ELEM; i++) {
			m->st[i].id = 0;
			m->st[i].data = NULL;
		}
	}
	
	/* the first element */
	if (m->st[0].id != 1) {
		/* should be state */
		exit(-1);
	}
	
	m->st_depth++;
#if 0	
	fprintf(stderr, "<-- %s\n", name);
#endif
	/* string -> int */
	if (m->st[m->st_depth + 1].id == 0) {
		i = 0;
		for ( i = 0; matches[i].name != NULL && 0 != strcmp(name, matches[i].name); i++)
			;
		
		if (matches[i].name == NULL) {
			/* unknown section */
		} else if (matches[i].type == m->st[m->st_depth].id) {
			m->st[m->st_depth].id = M_STATE_ST_UNSET;
			m->st_depth_max--;
		} else {
			/* stack out of order */
		}
	} else {
		switch (m->st[m->st_depth++].id) {
		case M_STATE_ST_GLOBAL:
			if (mstate_read_global (m, M_TAG_END, name, NULL)) {
				M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "read failed - going down hard\n");
				exit(-1);
			}
			break;
		case M_STATE_ST_WEB:
			if (mstate_read_web (m, M_TAG_END, name, NULL)) {
				M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "read failed - going down hard\n");
				exit(-1);
			}
			break;
		case M_STATE_ST_MAIL:
			if (mstate_read_mail (m, M_TAG_END, name, NULL)) {
				M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "read failed - going down hard\n");
				exit(-1);
			}
			break;
		default:
			M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "unhandled - going down hard\n");
			exit(-1);
		}
	}
}

/**
 * process the content (text( of the statefiles 
 * 
 * SAX callback for the XML content (!= tags)
 * 
 */

void mstate_characters(void *user_data, const xmlChar *name, int len) {
	int i;
	mstate_stack *m = user_data;
	
	enum {	 M_STATE_ST_UNSET, M_STATE_ST_WEB, 
		 M_STATE_ST_TELECOM, M_STATE_ST_TRAFFIC,
		 M_STATE_ST_GLOBAL, M_STATE_ST_MAIL 
	};
	xmlChar *s;
	
	/* level 0 <state> */
	m->st_depth = 0;
	
		
	/* setup the array if we are here the first time */
	if (m->st[0].id == -1) {
		for (i = 0; i < M_STATE_ST_ELEM; i++) {
			m->st[i].id = 0;
			m->st[i].data = NULL;
		}
	}
	
	/* the first element */
	if (m->st[0].id != 1) {
		/* should be state */
		exit(-1);
	}
	
	s = malloc((len + 1) * sizeof(xmlChar));
	strncpy(s, name, len);
	s[len] = '\0';
#if 0	
	fprintf(stderr, "--| %s\n", s);
#endif	
	m->st_depth++;
	
	/* ignore text directly after the <web> tag */
	if (m->st_depth_max > 1) {
	
		switch (m->st[m->st_depth++].id) {
		case M_STATE_ST_GLOBAL:
			if (mstate_read_global (m, M_TAG_TEXT, s, NULL)) {
				M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "read failed - going down hard\n");
				exit(-1);
			}
		
			break;
		case M_STATE_ST_WEB:
			if (mstate_read_web (m, M_TAG_TEXT, s, NULL)) {
				M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "read failed - going down hard\n");
				exit(-1);
			}
			
			break;
		case M_STATE_ST_MAIL:
			if (mstate_read_mail (m, M_TAG_TEXT, s, NULL)) {
				M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
					 "read failed - going down hard\n");
				exit(-1);
			}
			
			break;
		default:
			M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "unhandled - going down hard\n");
			exit(-1);
		}
	}
	
	free(s);
}

int mstate_read (mconfig *conf, mstate *state, int year, int month, char *subpath) {
	char filename[255];
	mstate_stack m;
	
	if (year == 0 || month == 0) {
		sprintf(filename, "%s/%s/mla.state.xml", 
			conf->outputdir, 
			subpath ? subpath : "");
	} else {
		sprintf(filename, "%s/%s/mla.state.xml.%04d%02d", 
			conf->outputdir, 
			subpath ? subpath : "",
			year, month );
	}
	
	mlaSAXHandler.startElement = mstate_startElement;
	mlaSAXHandler.endElement = mstate_endElement;
	mlaSAXHandler.characters = mstate_characters;
	mlaSAXHandler.warning = xmlParserWarning;
	mlaSAXHandler.error = xmlParserError;
	mlaSAXHandler.fatalError = xmlParserError;
	
	m.ext_conf = conf;
	m.st[0].id = -1;
	m.state = state;
	m.st_depth_max = 0;
	m.st_depth = 0;

	xmlSAXUserParseFile(&mlaSAXHandler, &m, filename);
	
	return 0;
}
