from Normalization import initializeNPanel
from javawrap import GridBagConstraints
import InfectionEfficiency
import MainHandle
import MajorObjects
import PlateSet
import java.awt.GridBagLayout as GridBagLayout
import javax.swing as swing
import os.path
import re

masterObject = MainHandle.masterObject

# same borders for all top-level subpanels:
loweredEtched = swing.BorderFactory.createCompoundBorder(swing.BorderFactory.createEmptyBorder(1, 1, 1, 1),
                                                         swing.BorderFactory.createEtchedBorder(swing.border.EtchedBorder.LOWERED))

# create the main plate loading panel/tab
def createPlateFileLoadPanel():
    # DEFAULT DIRECTORY FOR DEBUGGING:::::::::::::
    #masterObject.setField('fileDirectory','C:/DataAnalysis/FlatFileTest')
    # create main frame:
    plateFileLoadPanel = MajorObjects.NewPanel()
    plateFileLoadPanel.setLayout(GridBagLayout())

    # create the "Select assay datafile" button
    (dataFileButtonPane, compList) = makeDataFileButton()
    MainHandle.addToMasterObject(compList)
    # and place it in the center of the panel
    wConstraint = GridBagConstraints.newGridBagConstraints()
    wConstraint.fill = GridBagConstraints.NONE
    wConstraint.anchor = GridBagConstraints.CENTER
    wConstraint.gridx = 0
    wConstraint.gridy = 0
    wConstraint.weighty = 1.0

    # add the button to the panel
    plateFileLoadPanel.add(dataFileButtonPane, wConstraint)

    # return the panel
    return plateFileLoadPanel

# make the "Select assay datafile" button
def makeDataFileButton():
    dataFileButtonPane = MajorObjects.BorderedButton('Select assay datafile', selectAndLoadDataFile)

    # return the pane containing the button along with pointers to the pane and button objects:
    return (dataFileButtonPane, [['dataFileButtonPane', dataFileButtonPane],
                             ['dataFileButton', dataFileButtonPane.button]])

# splits a line into fields. strips encosing quotes from each field when they are present.
def separateFields(line):
    cellList = []
    while True:
        if line.startswith("\""):
            firstCellAndRest = re.split("([\t,])(?=(?:[^\"]|\"[^\"]*\")*$)", line, 1)
        else:
            firstCellAndRest = re.split("([\t,])", line, 1)
        field = firstCellAndRest[0]
        if field.startswith('"') and field.endswith('"'):
            field = field[1:-1]
        cellList.append(field)
        if len(firstCellAndRest) < 3:
            break
        line = firstCellAndRest[2]
    return cellList

# action listener fired when the button is pressed:
def selectAndLoadDataFile(event):
    # choose the input file name, if chooseFile returns NONE, return:
#    if os.path.exists('C:/DataAnalysis/FlatFileTest'):
#        masterObject.setField('fileDirectory', 'C:/DataAnalysis/FlatFileTest')
#    else:
#        masterObject.setField('fileDirectory', os.getcwd())

    # set the file directory to the current directory:
    masterObject.setField('fileDirectory', os.getcwd())
    
    # get the input file name:
    inFile = MajorObjects.chooseInputFile()
    if inFile == None:
        return

    # try to open the input file:
    fin = open(inFile, 'r')

    # initialize storage variables:
    plateSet = {}
    screenList = []
    batchList = []
    plateList = []
    vplateList = []
    geneSymbList = []
    plateDataFields = {}
    plateDefField = {}
    vPlateData = {}
    cloneMap = {}
    vpCounts = {}
    vplateId2VPlateName = {}
    batchId2BatchName = {}
    screenId2ScreenName = {}
    featureList = []
    dataFields = None
    defField = None
    readingData = 0
    fieldsSet = 0

    # read the data from the input file:
    while 1:
        # read an input line:
        line = fin.readline()
        if not line:
            # if this is the end of the file, break
            break

        line = line.strip() # strip any whitespace from the beginning or end of the line
        if len(line) == 0:
            # if the line is blank, skip it
            continue
        elif line.startswith('#'):
            # if the line starts with '#', it is a comment, so skip it
            continue
        elif line.startswith('dataFields'):
            # this allows a user to specify the data feature names before the header is read
            fields = separateFields(line)
            temp = fields[1].replace('"', '')
            # remove extraneous spaces from data fields and reconstruct the dataFields string:
            sepFields = temp.split(',')     # the data fields are comma-separated
            dataFields = ''
            # construct the list of data fields
            for field in sepFields:
                featureList.append(field.strip())   # list of features in the specified order
                if dataFields == '':
                    dataFields = '%s:%%f' % field.strip()
                else:
                    dataFields = '%s,%s:%%f' % (dataFields, field.strip())

                if defField == None:
                    defField = dataFields.split(',')[0]         # initialize the deault field to the first parameter
                    #print 'initial default field: %s' % defField
            fieldsSet = 1   # indicate that the field names have been set

        elif line.startswith('defaultField'):
            # this allows a user to specify the default field to be something other than the first feature column
            fields = separateFields(line)
            defField = fields[1].replace('"', '')
            defField = '%s:%%f' % defField.strip()
            #if dataFields.find(defField)==-1:
            if dataFields.find(defField) == -1:
                print 'default data field (%s) not found in data field list (%s)' % (defField, dataFields)
                return
            print 'updated default field: %s' % defField

        elif line.startswith('screen name') or line.startswith('"screen name"'):
            # this indicates that a new header line has been encountered:
            # read the column headers
            fields = separateFields(line)
            # file format is as follows:
            #    col    description
            #      0     screen name
            #      1     batch name
            #      2     plate name
            #      3     selection (puro, etc.)
            #      4     selection status
            #      5     use for IE
            #      6     condition
            #      7     row
            #      8     column
            #     9:N    feature names
            #     N+1    virus plate ID
            #     N+2    clone ID
            #     N+3    hairpin description
            #     N+4    gene symbol
            #     N+5    gene description
            #     N+6    hairpin type (1=hairpin, 0=EMPTY, -1=control, -2=pgw
            readingFeats = 0        # flag indicating that the feature names are being parsed from the header line
            feature2column = {}     # mapping of feature name to column
            column2feature = {}     # mapping of column to feature name
            newHeader = 1           # indicate that this is a new feature

            for i in range(len(fields)):
                # parse the header line, one column at a time
                if readingFeats:
                    # assay feature names are being parsed
                    if fields[i] == 'virus plate ID':
                        # 'virus plate ID' is the first column after the assay feature columns:
                        readingFeats = 0    # done reading assay feature names
                    else:
                        if fieldsSet == 0:
                            # the field names were not set before the header line was encountered
                            if newHeader:
                                # clear the dataFields if this is a new header and the features were not set at the top of the file:
                                dataFields = None
                                newHeader = 0       # indicate that this is the latest header

                            # if the fields were not set at the top of the file, read the headers and
                            #   set the default field to the first one
                            thisField = '%s:%%f' % fields[i]
                            if dataFields == None:
                                # this is the first assay feature name
                                dataFields = thisField
                                # set the default feature to the first feature
                                defField = thisField
                                # create a list of features
                                featureList = []
                                featureList.append(fields[i])
                            else:
                                # append this assay feature name to the dataFields string...
                                dataFields = '%s,%s' % (dataFields, thisField)
                                # ...and to the list of feature names
                                featureList.append(fields[i])

                        feature2column[fields[i]] = i   # map the column to the feature name
                        column2feature[i] = fields[i]   # map the feature name to the column
                elif fields[i] == 'col':
                    # 'col' is the last column header before the assay feature names begin
                    readingFeats = 1    # the next column will be an assay feature name

            readingData = 1     # the next row will contain data
            featColList = column2feature.keys()     # the list of all column indices
            nextCol = min(featColList) + len(featColList)   # nextCol is the column containing the virus plate name
        elif readingData:
            # this is a row that contains data
            fields = separateFields(line)
            sName = fields[0]               # the screen name
            bName = fields[1]               # the batch name
            pName = fields[2]               # the plate name
            selection = fields[3].upper()   # selection method (generally PUROMYCIN)
            selStat = fields[4].upper()     # selection status (YES or NO)
            use4ie = fields[5].upper()      # use this plate for infection efficiency calculations (YES or NO)
            condition = fields[6]           # infection or assay condition description
            if condition == '':
                condition = 'None'  # if the condition is blank, set it to 'None'
            row = fields[7]                 # plate row
            col = '%02d' % int(fields[8])   # plate column
            feats = []                      # initialize the assay feature list
            # map features in the correct order
            c2fkeys = column2feature.keys()
            c2fkeys.sort()
            for feat in featureList:
                # build the list of data values of the assay features
                feats.append(float(fields[feature2column[feat]]))
            vpName = fields[nextCol]                # the virus plate name
            cloneID = fields[nextCol + 1]           # the clone ID 
            hpDesc = fields[nextCol + 2]            # the hairpin description 
            geneSymb = fields[nextCol + 4]          # the target gene symbol
            if geneSymbList.count(geneSymb) == 0:
                # build a list of all unique gene symbols
                geneSymbList.append(geneSymb)
            geneID = fields[nextCol + 3]            # the NCBI gene ID
            if geneID == '':
                # build a list of unique gene IDs
                geneID = geneSymbList.index(geneSymb)
            geneDesc = fields[nextCol + 5]          # the target gene description
            taxon = fields[nextCol + 6]             # the target gene taxon 
            hpType = fields[nextCol + 7]            # the hairpin type (-2=pgw, -1=control, 0=EMPTY, 1=gene-targeting hairpin)
            if hpType != '':
                # convert the string to an integer
                hpType = int(hpType)
            else:
                # if the hairpin type is empty, set it to 0, which indicates an EMPTY well
                hpType = 0

            # create local index for each of screen, batch, and plate
            # !!! assumes screen, batch, and plate names are unique across the input file

            # NOTE: the screen, batch, plate, and virus plate IDs are simply set to the index of the 
            #       item in a list of unique screen, batch, plate, and virus plate names. In the database
            #       version of RNAeyes, these IDs are unique 'serial number' identifiers for each item
            #       that are read from the database. 
            
            # screen name:
            if screenList.count(sName) == 0:
                screenList.append(sName)
            screenID = screenList.index(sName)          # a unique number assigned to this screen
            screenId2ScreenName[screenID] = sName       # map the screen name to the screen ID

            # batch name
            if batchList.count(bName) == 0:
                batchList.append(bName)
            batchID = batchList.index(bName)            # a unique number assigned to this batch
            batchId2BatchName[batchID] = bName          # map the batch name to the batch ID
            
            # plate name
            if plateList.count(pName) == 0:
                plateList.append(pName)
            plateID = '%d' % plateList.index(pName)     # a unique number assigned to this plate 

            # virus plate name
            if vplateList.count(vpName) == 0:
                vplateList.append(vpName)
            vplateID = '%d' % vplateList.index(vpName)  # a unique number assigned to this virus plate

            # create a new virus plate, if necessary:
            vPlateData.setdefault(vplateID, PlateSet.VirusPlate(vplateID))
            vPlateData[vplateID].set_plateName(vpName)  # assign the virus plate name

            # then add this well:
            vPlateData[vplateID].addFileWell(row, col, cloneID, hpDesc, geneID, geneSymb, geneDesc, taxon, hpType)
            # add the clone to the clone map (if it hasn't been seen before)
            cloneMap.setdefault(cloneID, PlateSet.CloneInfo(None, None, None, None,
                                                  geneSymb, geneDesc, None, geneID, hpDesc, taxon, hpType))

            # if this is a new plate, create the plate:
            if plateSet.keys().count(plateID) == 0:
                plateSet.setdefault(plateID, PlateSet.Plate(plateID, 'File'))
                #print 'new plateID: %s' % plateID
                # fill in some of the plate info
                plateSet[plateID].addPlateQry(screenID, sName, batchID, bName,
                                             pName, None, None, selection, selStat, use4ie, vplateID)
                # create a new assay ID
                plateSet[plateID].addReadId(plateID, use4ie)
                # add the condition description
                plateSet[plateID].addCondition(plateID, condition)
                # fill in the assay data fields for the assay
                plateDataFields[plateID] = dataFields
                # fill in the default field for the assay
                plateDefField[plateID] = defField

            # add the assay feature data for this well
            plateSet[plateID].addWellData(row, col, feats)

    # finished reading data. Close the input file.
    fin.close()

    # set global cloneMap:
    masterObject.setField('cloneMap', cloneMap)
    print 'Total number of unique hairpins: %d' % len(cloneMap.keys())

    # Use the plateID as the readID and use the "use4ie" flag as a proxy for cellViability:
    for plateID in plateSet.keys():
        plateSet[plateID].setAssayRawData2WellData(plateDataFields[plateID], plateDefField[plateID])  # <<< this should be plate-based
        iPlate = plateSet[plateID]                  # get the infection plate object
        virusPlateId = iPlate.get_virusPlateId()    # get the virus plate ID
        vplateId2VPlateName[virusPlateId] = vPlateData[virusPlateId].get_plateName()    # fill in the virus plate name in the ID-to-name map 
        # fill in the information about which infection plates used this virus plate
        vpCounts.setdefault(virusPlateId, PlateSet.VirusPlateInfo(virusPlateId))    
        # ... and update the count of puro+ plate, puro- plates, etc. for this virus plate     
        vpCounts[virusPlateId].updateCount('%s:%s:%s' % (iPlate.screenName, iPlate.batchName,
                                                         iPlate.get_condition(plateID)),
                                                         iPlate.selectionStatus, plateID, plateID, iPlate.get_use(plateID))

    # initialize the infection efficiency (IE) map
    ieData = {}

    # add information from each virus plate to the infection efficiency storage:
    for vpId in vpCounts.keys():
        ieData = vpCounts[vpId].updateIeData(ieData, plateSet, vPlateData)

    # set the virus plate for each plate in the plateset
    for plateId in plateSet.keys():
        plateSet[plateId].updateVirusPlate(vPlateData)

    # reset the vpListBox table:
    vpListBox = masterObject.getField('vpListBox')
    vpTableModel = vpListBox.table.getSelectionModel()
    vpTable = masterObject.getField('vpListBoxTable')

    # remove listeners while updating the table (keeps the action listeners from firing every time 
    # anything is changed in any of the tables):
    listeners = vpTableModel.getListSelectionListeners()
    for listener in listeners:
        vpTableModel.removeListSelectionListener(listener)

    # clear the virus plate list table (in the Infection Efficiency tab)
    vpRowN = vpTable.getRowCount()
    if vpRowN > 0:
        vpListBox.clearList()

    # list all virus plates, with the number of puro+ and puro- plates used in the averages:
    vIePlateIdList = ieData.keys()
    vPlateStringList = []
    vPlateIdList = []
    vpName2IDmap = {}

    # get the vp list box from the Infection Efficiency tab:
    listBox = masterObject.getField('vpListPanel').getComp('vpListBox')

    # add a row to the virus plate list table for each virus plate in each screen, batch, and condition:
    for vPlateId in vIePlateIdList:
        for cond in ieData[vPlateId].keys():
            vpName = vPlateData[vPlateId].plateName         # virus plate name
            vpName2IDmap[vpName] = vPlateId                 # map the name to the plate ID
            vScreen = ieData[vPlateId][cond].screenName     # screen name
            vBatch = ieData[vPlateId][cond].batchName       # batch name
            nPlus = len(ieData[vPlateId][cond].selPlusPlateList)    # number of puro+ plates for this virus plate 
            nMinus = len(ieData[vPlateId][cond].selMinusPlateList)  # number of puro- plates for this virus plate 
            condition = cond.split(':')[2].replace('None', '')      # the condition string
            vPlateString = [vpName, vScreen, vBatch, condition, nPlus, nMinus, '', '']  # construct the table row
            vPlateStringList.append(vPlateString)           # add the row to a list of rows
            vPlateIdList.append(vPlateId)                   # add the virus plate ID to a parallel list of virus plate IDs

            listBox.addRow(vPlateString, vPlateId)          # add the row to the table

    masterObject.setField('vpListBoxRows', vPlateStringList)    # save the list of rows to the global variable storage
    masterObject.setField('vpListvpIdList', vPlateIdList)       # save the list of virus plate ids to the global variable storage
    masterObject.setField('vpName2IDmap', vpName2IDmap)         # save the name to ID map to the global variable storage

    # replace listeners after updating the table:
    for listener in listeners:
        vpTableModel.addListSelectionListener(listener)

    masterObject.setField('plateSet', plateSet)         # save the plate set to the global variable storage
    masterObject.setField('vPlateSet', vPlateData)      # save the virus plate set to the global variable storage
    masterObject.setField('ieData', ieData)             # save the infection efficiency data to the global variable storage

    masterObject.setField('vplateId2VPlateName', vplateId2VPlateName)   # save the virus plate ID to virus plate name map to the global variable storage
    masterObject.setField('batchId2BatchName', batchId2BatchName)       # save the batch ID to batch name map to the global variable storage
    masterObject.setField('screenId2ScreenName', screenId2ScreenName)   # save the screen ID to screen name map to the global variable storage

    # set up data points for IE plotting:
    InfectionEfficiency.initializeIEPlot()
    initializeNPanel()

