# -------------------------------------------------------------------------
#     This file is part of mMass - the spectrum analysis tool for MS.
#     Copyright (C) 2005-07 Martin Strohalm <mmass@biographics.cz>

#     This program is free software; you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation; either version 2 of the License, or
#     (at your option) any later version.

#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.

#     Complete text of GNU GPL can be found in the file LICENSE in the
#     main directory of the program
# -------------------------------------------------------------------------

# Function: Count masses for items in sequence.

# load libs
import re

# load modules
from nucleus import commfce


class mSeqCount:
    """ Count masses for items in sequence. """

    # ----
    def __init__(self, config):
        self.config = config
    # ----


    # ----
    def parseSequence(self, sequence, modifications):
        """ Count masses for all items in sequence and make parsed sequence. """

        # check sequence
        if not sequence:
            return []

        # walk in sequence
        parsedSequence = []
        for index in range(len(sequence)):

            # add current amino to parsed sequence
            parsedSequence.append([sequence[index], 0, 0]) # [[amino, mmass, amass, modifications...], [...]]

            # add amino acid mass
            mmass = self.config.amino[sequence[index]]['mmass']
            amass = self.config.amino[sequence[index]]['amass']
            parsedSequence[index][1] += mmass
            parsedSequence[index][2] += amass

            # add modifications
            for mod in modifications:
                if mod[2] not in self.config.mod:
                    continue
                elif (type(mod[0]) == int and mod[0] == index) \
                    or (type(mod[0]) != int and mod[0] == sequence[index]):
                    mmass = self.config.mod[mod[2]]['mmass']
                    amass = self.config.mod[mod[2]]['amass']
                    parsedSequence[index][1] += mmass
                    parsedSequence[index][2] += amass
                    parsedSequence[index].append(mod[2])

        # add N-term H
        parsedSequence[0][1] += self.config.elem['H']['mmass']
        parsedSequence[0][2] += self.config.elem['H']['amass']

        # add C-term OH
        parsedSequence[-1][1] += self.config.elem['O']['mmass'] + self.config.elem['H']['mmass']
        parsedSequence[-1][2] += self.config.elem['O']['amass'] + self.config.elem['H']['amass']

        return parsedSequence
    # ----


    # ----
    def getSelectionMass(self, sequence, charge, index1=None, index2=None, nTerm=None, cTerm=None):
        """ Count selection mass from parsed sequence. (Hydrolytic cleavage - add H2O) """

        mmass = 0
        amass = 0

        # get sequence selection
        seqSelection = sequence[index1:index2]

        # count total mass
        for aminoacid in seqSelection:
            mmass += aminoacid[1]
            amass += aminoacid[2]

        # add N-terminal group mass - H
        if index1 != 0 and index1 != None:
            mmass += self.config.elem['H']['mmass']
            amass += self.config.elem['H']['amass']

        # add C-terminal group mass - OH
        if index2 != len(sequence) and index2 != None:
            mmass += self.config.elem['O']['mmass'] + self.config.elem['H']['mmass']
            amass += self.config.elem['O']['amass'] + self.config.elem['H']['amass']

        # add N-terminal modification
        if nTerm != None:
            mmass += self.config.mod[nTerm]['mmass']
            amass += self.config.mod[nTerm]['amass']

        # add C-terminal modification
        if cTerm != None:
            mmass += self.config.mod[cTerm]['mmass']
            amass += self.config.mod[cTerm]['amass']

        # count ion masses
        if charge != 0:
            mmass += charge * self.config.elem['H']['mmass']
            mmass = mmass / abs(charge)
            amass += charge * self.config.elem['H']['amass']
            amass = amass / abs(charge)

        return mmass, amass
    # ----


    # ----
    def searchForMass(self, sequence, findMass, massType, errorType, tolerance, charge, nTerm, cTerm):
        """ Search sequence for specified mass within tolerance. """
        matched = []

        # get tolerance values - min/max masses
        minMass = findMass - commfce.countTolerance(findMass, tolerance, errorType)
        maxMass = findMass + commfce.countTolerance(findMass, tolerance, errorType)

        # get masstype index
        if massType == 'mmass':
            massType = 0
        else:
            massType = 1

        # get clean sequence
        cleanSequence =''
        for amino in sequence:
            cleanSequence += amino[0]

        # get masses for matched
        for x in range(len(sequence)):
            for y in range(x+1, len(sequence)+1):
                peptide = self.getSelectionMass(sequence, charge, x, y, nTerm, cTerm)
                mass = peptide[massType]
                if minMass <= mass <= maxMass:
                    error = commfce.calcMassError(mass, findMass, errorType)

                    # get AA before and after this peptide
                    before = False
                    after = False
                    if x:
                        before = cleanSequence[x-1]
                    if y < len(cleanSequence):
                        after = cleanSequence[y]

                    # add peptide
                    matched.append([mass, error, x, y, cleanSequence[x:y], before, after])

                elif mass > maxMass:
                    break

        return matched
    # ----


    # ----
    def searchForSequence(self, sequence, subSequence, charge, nTerm, cTerm):
        """ Search sequence for specified sub-sequence within tolerance. """

        matched = []
        subSeqPatt = re.compile('^(' + subSequence + ')$')
        seqLength = len(sequence)

        # get clean sequence
        cleanSequence =''
        for amino in sequence:
            cleanSequence += amino[0]

        # get masses for matched
        if re.search(subSequence, cleanSequence):
            for x in range(seqLength):
                for y in range(x+1, seqLength+1):
                    match = subSeqPatt.match(cleanSequence[x:y])
                    if match:

                        # get masses
                        mmass, amass = self.getSelectionMass(sequence, charge, x, y, nTerm, cTerm)

                        # get AA before and after this peptide
                        before = False
                        after = False
                        if x:
                            before = cleanSequence[x-1]
                        if y < len(cleanSequence):
                            after = cleanSequence[y]

                        # add peptide
                        matched.append([mmass, amass, x, y, cleanSequence[x:y], before, after])

        return matched
    # ----
