#include <urkepi.h>

/* 20 amino acids + selenocysteine (U) + stop (*) + unknown (X) */
#define POSSIBLE_RESIDUES 23

extern EpiDatPtr EpiDatNew (void)
{
  EpiDatPtr epip;

  if ((epip = (EpiDatPtr) MemNew (sizeof (EpiDat))) != NULL)
  {
    epip->window = 16;
    epip->linker = 5;        /* less than "classical" epitope */
    epip->method = 2;
    epip->percentcut = 5.0;
  }
  return epip;
}

extern EpiDatPtr EpiDatFree (EpiDatPtr epip)
{
  if (epip != NULL)
  {
    MemFree (epip->score);
    MemFree (epip->epiarray);
    epip = MemFree (epip);
  }
  return epip;
}

static FloatHi ArrayScore (Int4Ptr epiarray)
{
  FloatHi  episcore;
  Int4     i;

  episcore = 1.0;
  for (i = (Int4) 'A'; i <= (Int4) 'Z'; i++)
   episcore *= (FloatHi) epiarray[i];
  episcore *= (FloatHi) epiarray['*'];

  return episcore;
}

static void UpdateScore (FloatHiPtr fptrscan, FloatHi episcore,
                         Int4 start, Int4 end)
{
  Int4 i;

  for (i = start; i <= end; i++)
  {
    if (*fptrscan <= 0.0)
      *fptrscan = episcore;
    if (episcore < *fptrscan)
      *fptrscan = episcore;
    fptrscan++;
  }
  return;
}

static FloatHi MaxScore (Int4 window)
{
  FloatHi maxscore;
  Int4    a, b, m, n;

  a = window / POSSIBLE_RESIDUES;
  if ((m = window % POSSIBLE_RESIDUES) != 0)
    a++;
  else
    m = POSSIBLE_RESIDUES;
  b = a + 1;
  n = POSSIBLE_RESIDUES - m;
  maxscore = (FloatHi) (pow ((FloatHi) a, (FloatHi) n) *
                        pow ((FloatHi) b, (FloatHi) m));
  return maxscore;
}

static FloatHi PercentEpiScore (FloatHi episcore, Int4 window)
{
  FloatHi maxscore, psc;

  maxscore = MaxScore (window);
  psc = episcore * 100.0 / maxscore;

  return psc;
}

static Int4 PredictEpiI2 (CharPtr seqin, Int4 start, Int4 end,
                          EpiDatPtr epip, FloatHiPtr fptrhead)
{
  Int4       i, startepi, stopepi;
  Int4       window, minwin;
  CharPtr    seq;
  FloatHiPtr fptr;
  FloatHi    score, episcore, pmscore, minscore;

  if (seqin == NULL || epip == NULL)
    return -1;

  minwin = 6;
  minscore = 100.0;

  if (minwin > epip->window)
    minwin = epip->window;

  seq = &seqin[start];
  fptr = &fptrhead[start];

/* "zero" array for every cycle */
  for (i = (Int4) 'A'; i <= (Int4) 'Z'; i++)
   epip->epiarray[i] = 1;
  epip->epiarray['*'] = 1;

  startepi = start + minwin - 1;
  stopepi = -1;
  for (i = start; i < startepi; i++)
  {
    epip->epiarray[*seq] += 1;
    seq++;
  }

  for (i = startepi, window = minwin;
       i <= end && window <= epip->window;
       i++, window++)
  {
    epip->epiarray[*seq] += 1;
    episcore = ArrayScore (epip->epiarray);
    if ((pmscore = PercentEpiScore (episcore, i - start + 1)) <= minscore)
    {
      minscore = pmscore;
      score = episcore;
      stopepi = i;
    }
    seq++;
  }

  if (stopepi > -1)
  {
    UpdateScore (fptr, episcore, start, stopepi);
    return stopepi - (minwin/2);
  }

  return startepi;
}

static Int4 PredictEpiI1 (CharPtr seqin, Int4 start, Int4 end,
                          EpiDatPtr epip, FloatHiPtr fptrhead)
{
  Int4       i, j, k;
  CharPtr    seq;
  FloatHiPtr fptr;
  FloatHi    episcore;
  Boolean    flagGotMin;

  Int4       minwin = 6;
  FloatHi    mincutoff = 43.0;

  if (seqin == NULL || epip == NULL)
    return -1;

  if (minwin > epip->window)
    minwin = epip->window;

  flagGotMin = FALSE;

  seq = &seqin[start];
  fptr = &fptrhead[start];

/* "zero" array for every cycle */
  for (j = (Int4) 'A'; j <= (Int4) 'Z'; j++)
   epip->epiarray[j] = 1;
  epip->epiarray['*'] = 1;

/* minwin-mer set up */
  k = start + minwin - 1;
  for (j = start; j < k && j <= end; j++)
  {
    epip->epiarray[*seq] += 1;
    seq++;
  }

/* minimum minwin-mer */
  for (i = j; i <= end; i++)
  {
    epip->epiarray[*seq] += 1;
    episcore = ArrayScore (epip->epiarray);
    if (PercentEpiScore (episcore, minwin) < mincutoff)
    {
      flagGotMin = TRUE;
      break;
    }
    else
    {
      epip->epiarray[*(seq - minwin + 1)] -= 1;
    }
    seq++;
    fptr++;
  }
  if (!flagGotMin)
    return -1;

/* run -- set up */
  seq++;
  if (epip->window == minwin)
  {
    episcore = ArrayScore (epip->epiarray);
    if (PercentEpiScore (episcore, epip->window) < epip->percentcut)
      UpdateScore (fptr, episcore, i - epip->window + 1, i);
    j = i + 1;
    fptr++;
    epip->epiarray[*(seq - epip->window)] -= 1;
  }
  else
  {
    k = i + epip->window - 1 - minwin;
    for (j = i; j < k && j <= end; j++)
    {
      epip->epiarray[*seq] += 1;
      episcore = ArrayScore (epip->epiarray);
      if (PercentEpiScore (episcore, j-start+1+minwin) < epip->percentcut)
      {
        if (episcore < (FloatHi) epip->window+1)
          episcore = (FloatHi) epip->window+1;
        UpdateScore (fptr, episcore, i - epip->window + 1, j);
      }
      else
      {
        return j;
      }
      seq++;
    }
  }

/* run -- rolling scan */
  for (i = j; i <= end; i++)
  {
    epip->epiarray[*seq] += 1;
    episcore = ArrayScore (epip->epiarray);
    if (PercentEpiScore (episcore, epip->window) < epip->percentcut)
      UpdateScore (fptr, episcore, i-epip->window+1, i);
    else
      return i - 1;
    fptr++;
    seq++;
    epip->epiarray[*(seq - epip->window)] -= 1;
  }

  return (i - 1);
}

extern FloatHiPtr PredictEpi (CharPtr seqin, Int4 start, Int4 end,
                              EpiDatPtr epip)
{
  Int4       start1, end1, lastpos;
  FloatHiPtr epiprob;

  if (seqin == NULL || epip == NULL)
    return NULL;

  start1 = start;
  end1 = start1 + (epip->window - 1);
  if (end1 > end)
    return NULL;

  if ((epip->epiarray = (Int4Ptr) MemNew ((size_t) (sizeof (Int4) * 256)))
      == NULL)
    return NULL;

  if ((epiprob = (FloatHiPtr) MemNew ((size_t) (sizeof (FloatHi) *
                                      (end-start+2)))) == NULL)
    return NULL;

  while (start1 <= end)
  {
    switch (epip->method)
    {
     case 1:
      lastpos = PredictEpiI1 (seqin, start1, end, epip, epiprob);
      break;
     case 2:
     default:
      lastpos = PredictEpiI2 (seqin, start1, end, epip, epiprob);
      break;
    }
    if (lastpos < 0)
    {
      epip->epiarray = MemFree (epip->epiarray);
      return epiprob;
    }
    start1 = lastpos + 1;
    end1 = start1 + (epip->window - 1);
    if (end1 > end)
    {
      epip->epiarray = MemFree (epip->epiarray);
      return epiprob;
    }
  }
/* should never get here */
  epip->epiarray = MemFree (epip->epiarray);
  return epiprob;
}

/*
 seqport should be opened full length (0 to bsp->length-1)
 start and end reflect where you want to search
*/

extern FloatHiPtr PredictEpiSeqPort (SeqPortPtr spp,
                                     Int4 start, Int4 end,
                                     EpiDatPtr epip)
{
  CharPtr    seq, seqhead;
  Int4       i;
  FloatHiPtr epiprob;

  if (spp == NULL || epip == NULL)
    return NULL;

  seq = seqhead = (CharPtr) MemNew ((size_t) (sizeof (Char) *
                                    (end-start+1)));
  if (seq == NULL)
    return NULL;

  SeqPortSeek (spp, start, SEEK_SET);
  for (i = 0; i <= end; i++)           /* note start at 0 */
  {
    *seq = SeqPortGetResidue (spp);
    seq++;
  }
  epiprob = PredictEpi (seqhead, start, end, epip);
  MemFree (seqhead);
  return epiprob;
}

extern FloatHiPtr PredictEpiBioseq (BioseqPtr bsp,
                                    Int4 start, Int4 end,
                                    EpiDatPtr epip)
{
  SeqPortPtr spp;
  FloatHiPtr epiprob;

  if (bsp == NULL || epip == NULL)
    return NULL;

  if (!ISA_aa (bsp->mol))
    return NULL;

  spp = SeqPortNew (bsp, 0, bsp->length-1, 0, Seq_code_iupacaa);
  epiprob = PredictEpiSeqPort (spp, start, end, epip);
  SeqPortFree (spp);
  return epiprob;
}

extern FloatHiPtr PredictEpiSeqLoc (SeqLocPtr slp,
                                    EpiDatPtr epip)
{
  BioseqPtr  bsp;
  Int4       start, end;
  FloatHiPtr epiprob;

  if (slp == NULL || epip == NULL)
    return NULL;

  if (slp->choice != SEQLOC_INT)
    return NULL;

  if ((bsp = BioseqLockById (SeqLocId (slp))) == NULL)
    return NULL;

  if (!ISA_aa (bsp->mol))
  {
    BioseqUnlock (bsp);
    return NULL;
  }

  start = SeqLocStart (slp);
  end = SeqLocStop (slp);
  epiprob = PredictEpiBioseq (bsp, start, end, epip);
  BioseqUnlock (bsp);
  return epiprob;
}

extern SeqLocPtr FilterEpi (EpiDatPtr epip, Int4 length, SeqIdPtr sip,
                            Boolean flagHighPass)
{
  Int4       i;
  Int4       start, stop;
  SeqLocPtr  nextslp, slp, slph = NULL;
  SeqIntPtr  sint;

  FloatHi    maxscore, curscore, cutminscr;

  if (epip->score == NULL)
    return NULL;

/* post develop filter */

  cutminscr = 32.0;

/* normalize scores */
  maxscore = MaxScore (epip->window);

/* max relative to min 0 */
  maxscore -= (epip->window + 1);

  for (i = 0; i < length; i++)
  {
    epip->score[i] -= (epip->window + 1);
    if (epip->score[i] < 0.0)
      epip->score[i] = maxscore;
    epip->score[i] = epip->score[i] * 100.0 / maxscore;
  }

  for (i = 0; i < length; i++)
  {
    if (flagHighPass)
    {
      if (epip->score[i] > epip->percentcut)
        break;
    }
    else
    {
      if (epip->score[i] <= epip->percentcut)
        break;
    }
  }
  if (i == length)
    return NULL;

  while (i < length)
  {
    curscore = epip->score[i];
    if (flagHighPass)
    {
      if (epip->score[i] > epip->percentcut)
      {
        start = i;
        while (epip->score[i] > epip->percentcut && i < length)
        {
          i++;
        }
        stop = i - 1;
        slp = SeqLocIntNew (start, stop, Seq_strand_unknown, sip);
        ValNodeLink (&slph, slp);
      }
      else
      {
        i++;
      }
    }
    else
    {
      if (epip->score[i] <= epip->percentcut)
      {
        start = i;
        while (epip->score[i] <= epip->percentcut && i < length)
        {
          if (epip->score[i] > curscore)
            curscore = epip->score[i];
          i++;
        }
        stop = i - 1;
        if (epip->method == 1)
        {
          if (!(stop-start+1 <= epip->window && curscore >= cutminscr))
          {
            slp = SeqLocIntNew (start, stop, Seq_strand_unknown, sip);
            ValNodeLink (&slph, slp);
          }
        }
        else
        {
          slp = SeqLocIntNew (start, stop, Seq_strand_unknown, sip);
          ValNodeLink (&slph, slp);
        }
      }
      else
      {
        i++;
      }
    }
  }

  if (epip->linker > 0)
  {
    slp = slph;
    while (slp != NULL)
    {
      if (slp->next != NULL)
      {
        nextslp = slp->next;
        stop = SeqLocStop (slp);
        start = SeqLocStart (nextslp);
        if (start-stop-1 <= epip->linker)
        {
          sint = slp->data.ptrvalue;
          sint->to = SeqLocStop (nextslp);
          slp->next = nextslp->next;
          nextslp->next = NULL;
          SeqLocFree (nextslp);
          continue;
        }
      }
      slp = slp->next;
    }
  }

  return slph;
}
