#include <stddef.h>
#include "mb.h"

int
mb_find_char(const char *s, size_t *p_b, size_t *p_e)
{
  size_t b = *p_b;
  size_t e = *p_e;

  if (b < e) {
    int c = (unsigned char)s[b];

    if (c >= 0xC0 && c <= 0xFD) {
      size_t ret = 0;

      c = mb_char_dec(&s[b], &ret, e - b);

      if (ret > 0)
	*p_e = b + ret;
      else
	*p_e = b + 1;

      return c;
    }
    else if ((c & ~0x3F) == 0x80) {
      size_t cb;

      for (cb = b ; cb > 0 && b - cb < MB_LEN_MAX ;) {
	--cb;

	if ((unsigned char)s[cb] >= 0xC0 && (unsigned char)s[cb] <= 0xFD) {
	  size_t cn = 0;

	  c = mb_char_dec(&s[cb], &cn, e - cb);

	  if (cb + cn > b) {
	    *p_b = cb;
	    *p_e = cb + cn;
	    return c;
	  }
	  else {
	    c = (unsigned char)s[b];
	    break;
	  }
	}
      }
    }

    *p_e = b + 1;
    return c;
  }

  return EOF;
}

void
mb_chartype(int c, size_t n, mb_char_t *ch)
{
  switch (n) {
  case 2:
    if (c >= 0x80)
      goto utf8type;

    mb_parse_badutf8(c, ch);
    break;
  case 3:
    if (c >= 0x800)
      goto utf8type;

    mb_parse_badutf8(c + 0x80, ch);
    break;
  case 4:
    if (c >= 0x10000)
      goto utf8type;

    mb_parse_badutf8(c + 0x880, ch);
    break;
  case 5:
    if (c >= 0x200000)
      goto utf8type;

    mb_parse_badutf8(c + 0x10880, ch);
    break;
  case 6:
    if (c < 0x4000000) {
      mb_parse_badutf8(c + 0x210880, ch);
      break;
    }
  utf8type:
    ch->c = c;
    ch->set = mb_128;
    ch->fc = 0x47 & MB_ESC_FC_MASK;
    ch->gn = mb_G1;
    ch->sn = mb_SR;
    break;
  default:
    if (c >= 0x21 && c <= 0x7E) {
      ch->c = c - 0x20;
      ch->set = mb_94_0;
      ch->fc = 0x42 & MB_ESC_FC_MASK;
      ch->gn = mb_G0;
      ch->sn = mb_SL;
    }
    else {
      ch->c = c;
      ch->set = mb_128;
      ch->fc = MB_UNKNOWN_FC & MB_ESC_FC_MASK;

      if (c & 0x80) {
	ch->gn = mb_G1;
	ch->sn = mb_SR;
      }
      else {
	ch->gn = mb_G0;
	ch->sn = mb_SL;
      }
    }

    break;
  }
}

static struct {
  char beg, end;
  mb_128cwidth_t func;
} mb_128cwidthv[] = {
#undef def_mb128
#define def_mb128(mac, fcb, fce, convg0sl, encoder, decoder, cwidth) {(fcb) & MB_ESC_FC_MASK, (fce) & MB_ESC_FC_MASK, cwidth},
#include "mb128.h"
};

/* made from EastAsianWidth.txt */
mb_ucswidthtab_t eaw_ucswidth[] = {
#include "ucswidth.h"
};

/* taken from xterm(-146)/wcwidth.c::my_wcwidth() */
mb_ucswidthtab_t xterm_ucswidth[] = {
  {0x0300, 0x034E, 0},
  {0x0360, 0x0362, 0},
  {0x0483, 0x0486, 0},
  {0x0488, 0x0489, 0},
  {0x0591, 0x05A1, 0},
  {0x05A3, 0x05B9, 0},
  {0x05BB, 0x05BD, 0},
  {0x05BF, 0x05BF, 0},
  {0x05C1, 0x05C2, 0},
  {0x05C4, 0x05C4, 0},
  {0x064B, 0x0655, 0},
  {0x0670, 0x0670, 0},
  {0x06D6, 0x06E4, 0},
  {0x06E7, 0x06E8, 0},
  {0x06EA, 0x06ED, 0},
  {0x0711, 0x0711, 0},
  {0x0730, 0x074A, 0},
  {0x07A6, 0x07B0, 0},
  {0x0901, 0x0902, 0},
  {0x093C, 0x093C, 0},
  {0x0941, 0x0948, 0},
  {0x094D, 0x094D, 0},
  {0x0951, 0x0954, 0},
  {0x0962, 0x0963, 0},
  {0x0981, 0x0981, 0},
  {0x09BC, 0x09BC, 0},
  {0x09C1, 0x09C4, 0},
  {0x09CD, 0x09CD, 0},
  {0x09E2, 0x09E3, 0},
  {0x0A02, 0x0A02, 0},
  {0x0A3C, 0x0A3C, 0},
  {0x0A41, 0x0A42, 0},
  {0x0A47, 0x0A48, 0},
  {0x0A4B, 0x0A4D, 0},
  {0x0A70, 0x0A71, 0},
  {0x0A81, 0x0A82, 0},
  {0x0ABC, 0x0ABC, 0},
  {0x0AC1, 0x0AC5, 0},
  {0x0AC7, 0x0AC8, 0},
  {0x0ACD, 0x0ACD, 0},
  {0x0B01, 0x0B01, 0},
  {0x0B3C, 0x0B3C, 0},
  {0x0B3F, 0x0B3F, 0},
  {0x0B41, 0x0B43, 0},
  {0x0B4D, 0x0B4D, 0},
  {0x0B56, 0x0B56, 0},
  {0x0B82, 0x0B82, 0},
  {0x0BC0, 0x0BC0, 0},
  {0x0BCD, 0x0BCD, 0},
  {0x0C3E, 0x0C40, 0},
  {0x0C46, 0x0C48, 0},
  {0x0C4A, 0x0C4D, 0},
  {0x0C55, 0x0C56, 0},
  {0x0CBF, 0x0CBF, 0},
  {0x0CC6, 0x0CC6, 0},
  {0x0CCC, 0x0CCD, 0},
  {0x0D41, 0x0D43, 0},
  {0x0D4D, 0x0D4D, 0},
  {0x0DCA, 0x0DCA, 0},
  {0x0DD2, 0x0DD4, 0},
  {0x0DD6, 0x0DD6, 0},
  {0x0E31, 0x0E31, 0},
  {0x0E34, 0x0E3A, 0},
  {0x0E47, 0x0E4E, 0},
  {0x0EB1, 0x0EB1, 0},
  {0x0EB4, 0x0EB9, 0},
  {0x0EBB, 0x0EBC, 0},
  {0x0EC8, 0x0ECD, 0},
  {0x0F18, 0x0F19, 0},
  {0x0F35, 0x0F35, 0},
  {0x0F37, 0x0F37, 0},
  {0x0F39, 0x0F39, 0},
  {0x0F71, 0x0F7E, 0},
  {0x0F80, 0x0F84, 0},
  {0x0F86, 0x0F87, 0},
  {0x0F90, 0x0F97, 0},
  {0x0F99, 0x0FBC, 0},
  {0x0FC6, 0x0FC6, 0},
  {0x102D, 0x1030, 0},
  {0x1032, 0x1032, 0},
  {0x1036, 0x1037, 0},
  {0x1039, 0x1039, 0},
  {0x1058, 0x1059, 0},
  {0x1100, 0x115F, 2},
  {0x17B7, 0x17BD, 0},
  {0x17C6, 0x17C6, 0},
  {0x17C9, 0x17D3, 0},
  {0x18A9, 0x18A9, 0},
  {0x20D0, 0x20E3, 0},
  {0x2E80, 0x3009, 2},
  {0x300C, 0x3019, 2},
  {0x301C, 0x3029, 2},
  {0x302A, 0x302F, 0},
  {0x3030, 0x303E, 2},
  {0x3040, 0x3098, 2},
  {0x3099, 0x309A, 0},
  {0x309B, 0xA4CF, 2},
  {0xAC00, 0xD7A3, 2},
  {0xF900, 0xFAFF, 2},
  {0xFB1E, 0xFB1E, 0},
  {0xFE20, 0xFE23, 0},
  {0xFE30, 0xFE6F, 2},
  {0xFF00, 0xFF5F, 2},
  {0xFFE0, 0xFFE6, 2},
};

static mb_ucswidthtab_t *ucswidth = NULL;
static size_t nucswidth = 0;

static struct t2w_st {
  const char *name;
  size_t namelen;
  mb_ucswidthtab_t *widthtab;
  size_t widthtab_size;
} t2w_tab[] = {
  {MB_KEY_DEF("xterm"), xterm_ucswidth, sizeof(xterm_ucswidth) / sizeof(xterm_ucswidth[0])},
  {MB_KEY_DEF("EastAsianWidth"), eaw_ucswidth, sizeof(eaw_ucswidth) / sizeof(eaw_ucswidth[0])},
};
 
void
mb_set_widthtable(char *term)
{
  struct t2w_st *p = NULL;

  if (!term) term = getenv("TERM");

  if (term)
    p = mb_nc_bsearch(term, strlen(term), t2w_tab,
		      offsetof(struct t2w_st, name), offsetof(struct t2w_st, namelen),
		      sizeof(struct t2w_st), sizeof(t2w_tab));

  if (p) {
    ucswidth = p->widthtab;
    nucswidth = p->widthtab_size;
  }
  else {
    ucswidth = xterm_ucswidth;
    nucswidth = sizeof(xterm_ucswidth) / sizeof(xterm_ucswidth[0]);
  }
}

size_t
mb_parsedcharlength(mb_char_t *ch)
{
  int fc;

  switch (ch->set) {
  case mb_94x94:
    return 2;
  case mb_128:
    fc = ch->fc & MB_ESC_FC_MASK;

    if (fc == (0x47 & MB_ESC_FC_MASK)) {
      int c = ch->c;
      size_t b, e, i;

      if (!ucswidth) mb_set_widthtable(NULL);

      for (b = 0, e = nucswidth ; b < e ;) {
	i = (b + e) / 2;

	if (c < ucswidth[i].beg)
	  e = i;
	else if (c > ucswidth[i].end)
	  b = i + 1;
	else
	  return ucswidth[i].width;
      }

      return 1;
    }
    else {
      size_t b, e;

      for (b = 0, e = sizeof(mb_128cwidthv) / sizeof(mb_128cwidthv[0]) ; b < e ;) {
	size_t i = (b + e) / 2;

	if (fc < (unsigned char)mb_128cwidthv[i].beg)
	  e = i;
	else if (fc > (unsigned char)mb_128cwidthv[i].end)
	  b = i + 1;
	else
	  return mb_128cwidthv[i].func(ch);
      }

      return 1;
    }
  /* case mb_96: */
  /* case mb_94: */
  default:
    return 1;
  }
}

size_t
mb_charlength(int c, size_t n)
{
  mb_char_t ch;

  mb_chartype(c, n, &ch);
  return mb_parsedcharlength(&ch);
}

size_t
mb_memlength(const char *s, size_t n)
{
  size_t i, b, e, len;
  int c;

  for (len = i = 0 ; i < n ;) {
    b = 0;
    e = n - i;
    c = mb_find_char(&s[i], &b, &e);
    len += mb_charlength(c, e);
    i += e;
  }

  return len;
}

size_t
mb_strlength(const char *s)
{
  size_t b, e, len;
  int c;

  for (len = 0 ; *s ;) {
    b = 0;
    e = MB_LEN_MAX;
    c = mb_find_char(s, &b, &e);
    len += mb_charlength(c, e);
    s += e;
  }

  return len;
}

static struct {
  int beg, end, alt;
} preconv_tab[] = {
#include "preconv.h"
};

int
mb_iso_preconv(int iso, int *p_set, int *p_fc)
{
  int b, e, i;

  for (b = 0, e = sizeof(preconv_tab) / sizeof(preconv_tab[0]) ; b < e ;) {
    i = (b + e) / 2;

    if (iso < preconv_tab[i].beg)
      e = i;
    else if (iso > preconv_tab[i].end)
      b = i + 1;
    else {
      *p_set = MB_ESC_DEC_SET(preconv_tab[i].alt);
      *p_fc = MB_ESC_DEC_FC(preconv_tab[i].alt);
      return 1;
    }
  }

  return 0;
}

static struct {
  int beg, end, prop;
} ucs_cjk_tab[] = {
#include "ucs-cjk.h"
};

int *
mb_is_ucs_cjk(int ucs)
{
  int b, e, i;

  for (b = 0, e = sizeof(ucs_cjk_tab) / sizeof(ucs_cjk_tab[0]) ; b < e ;) {
    i = (b + e) / 2;

    if (ucs < ucs_cjk_tab[i].beg)
      e = i;
    else if (ucs > ucs_cjk_tab[i].end)
      b = i + 1;
    else
      return &ucs_cjk_tab[i].prop;
  }

  return NULL;
}

static struct {
  int beg, end, prop;
} iso_cjk_tab[] = {
#include "iso-cjk.h"
};

int *
mb_is_iso_cjk(int iso)
{
  int b, e, i;

  for (b = 0, e = sizeof(iso_cjk_tab) / sizeof(iso_cjk_tab[0]) ; b < e ;) {
    i = (b + e) / 2;

    if (iso < iso_cjk_tab[i].beg)
      e = i;
    else if (iso > iso_cjk_tab[i].end)
      b = i + 1;
    else
      return &iso_cjk_tab[i].prop;
  }

  return NULL;
}

static unsigned int ucs_prop_tab[] = {
#include "ucs-prop.h"
};

static unsigned int iso_prop_tab[] = {
#include "iso-prop.h"
};

int
mb_charprop(mb_char_t *ch)
{
  int set = ch->set, fc;
  int esc = MB_ESC_ENC(set, ch->fc);
  int key = ch->c, notucs = 1;
  unsigned int *node, value;
  mb_bt_result_t res;

  switch (esc) {
  case MB_ESC_ASCII:
    key += 0x20;
  case MB_ESC_UTF8:
    notucs = 0;
    node = ucs_prop_tab;
    break;
  case MB_ESC_ENC(mb_128, MB_UNKNOWN_FC):
    if (key < 0xA0) {
      node = ucs_prop_tab;
      break;
    }
  default:
    node = iso_prop_tab;
    key = MB_WORD_ENC(key, esc);
    break;
  }

loop:
  res = mb_bt_search(key, node, &value);

  if (res == mb_bt_failure && notucs && mb_iso_preconv(key, &set, &fc)) {
    esc = MB_ESC_ENC(set, fc);
    key = MB_WORD_ENC(ch->c, esc);
    node = iso_prop_tab;
    goto loop;
  }

  return (res != mb_bt_failure ? value :
	  (node = notucs ? mb_is_iso_cjk(key) : mb_is_ucs_cjk(key)) ? *node :
	  0);
}

int
mb_mbc_prop(const char *s, size_t e)
{
  size_t b = 0;
  int c = mb_find_char(s, &b, &e);
  mb_char_t ch;

  mb_chartype(c, e, &ch);
  return mb_charprop(&ch);
}
