from stats import stats
import MainHandle
import MajorObjects
import math
import org.jfree.data.xy.XYSeries as XYSeries

masterObject = MainHandle.masterObject

BSCORE_EPS = 0.01	  # b-score convergence factor
ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
			'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
M_SUM = 0.0
SD_SUM = 0.0
D_MEAN = 0.0

######
# This contains the classes required for plate data manipulation
######

# Plate class definition: all info available for a plate
class Plate:
	def __init__(self, plateId, dataSource='DB'):
		self.plateId = plateId    # the plate is defined by the plateID
		self.plateName = None
		self.readIdList = []      # read/assay IDs associated with the plate
		self.assayList = []       # corresponding assay data for each read/assay
		self.use4Ie = []          # flags indicating whether or not to use each assay for IE calculation
		self.screenId = None      # screen ID
		self.screenName = None    # screen Name
		self.batchId = None       # batch ID
		self.batchName = None     # batch Name
		self.barcode = None       # plate barcode
		self.virusPlateId = None  # ID of virus plate used for infection
		self.virusPlate = None    # name of virus plate used for infection
		self.plateFormat = None   # 96- or 384-well
		self.selection = None     # selection method (usually puromycin)
		self.selectionStatus = None   # whether selection was applied or not
		self.replicate = None     # s this plate considered a "replicate"
		self.plateHpMean = None   # plate statistic: mean of all valid hairpins
		self.plateHpStd = None    # plate statistic: STD of all valid hairpins
		self.plateHpCtlMean = None    # plate statistic: mean of all control hairpins
		self.plateHpCtlStd = None # plate statistic: STD of all control hairpins

		self.lowThreshSlope = None    # low threshold cutoff (by IE percent)
		self.highThreshSlope = None   # high threshold cutoff (by IE percent)
		self.lowCutoff = None         # low threshold cutoff (absolute)

		self.selectedFeature = {}     # assay feature that has been selected for display/processing for each assay
		self.availableFeatures = {}   # available assay features for each assay
		self.ieData = None            # IE data for the plate
		self.ieMask = None            # IE mask (indicates which wells not to use)
		self.ieMaskValid = 0          # is the mask valid/updated?
		self.controlWells = None      # indicates how to treat control wells
		self.controlWellCount = 0     # how many control wells on the plate
		self.norm = []                # normalized data for the plate
		self.normMethod = 'none'      # method used for normalization
		self.scoreStr = 'none'        # AD: ?
		self.use = 1                  # flag indicating whether or not to use this plate (the plate can be removed from processing)

		# for flat-file input:
		self.dataSource = dataSource  # did the data come from a file or from the database?
		self.wellData = {}            # if it's from a file, the data for each well is stored in this

	def get_virusPlateId(self):
		# get the virus plate ID used for infecting this plate
		return self.virusPlateId

	def isSelected(self):
		# puro+?
		if self.selectionStatus == 'YES':
			return 1
		else:
			return 0

	def updateVirusPlate(self, vPlateSet):
		# change the virus plate for this plate
		self.virusPlate = vPlateSet[self.virusPlateId]

	def addReadId(self, readId, cellViability):
		# add another assay ID for the plate
		self.readIdList.append(readId)
		thisAssay = Assay(readId)
		self.assayList.append(thisAssay)
		self.norm.append([])
		#print 'readId, replicate, viability', readId, self.replicate, cellViability
		if self.replicate.upper() == 'YES' and (cellViability == None or cellViability.upper() == 'YES'):
			# if these conditions are met, the assay may be used for IE calculation
			use4Ie = 1
		else:
			use4Ie = 0
		self.use4Ie.append(use4Ie)    # this flag will positionally correspond to the assay in readIdList and assayList

	def removeBadReads(self, readIdList):
		# remove an assay from the list
		for readId in readIdList:
			readIdx = self.readIdList.index(readId)
			self.readIdList.pop(readIdx)
			self.assayList.pop(readIdx)
			self.use4Ie.pop(readIdx)
			print 'removing readId %s from plate %s (%s)' % (readId, self.plateName, self.plateId)

		print ' readId list length: %d' % len(self.readIdList)
		if len(self.readIdList) == 0:
			return 1
		else:
			return 0

	def get_use(self, readId):
		# get the "use" flag for this readID
		try:
			readIdx = self.readIdList.index(readId)
		except:
			print 'get_use: unknown readId %s for plateId %s' % (readId, self.plateId)
			return None

		return self.use4Ie[readIdx]

	def get_readIdList(self):
		# get the list of available read/assay IDs
		return self.readIdList

	def addWellData(self, row, col, wellData):
		# wellData is a list of values that correspond to assays for this well
		self.wellData.setdefault(row, {})
		# convert wellData from a list to a string:
		wData = None  # string representation of the well data
		for x in wellData:
			if wData == None:
				wData = '%s' % x
			else:
				wData = '%s,%s' % (wData, x)
		self.wellData[row][col] = wData

	def setAssayRawData2WellData(self, dataFields, defField):
		# initialize the assay raw data to the well data for this plate 
		# NOTE: this is only used when the data is read from a file
		readId = self.readIdList[0]   # only one plate read allowed for flat file data
		dataType = 'default'
		self.setAssayRawData(readId, dataType, dataFields, defField, self.wellData)

	def setAssayRawData(self, readId, dataType, dataFields, defField, multidimData):
		# initialize the assay data to the raw data from the assay specified by the readID
		try:
			# get the index in the readIdList of the specified by readId 
			readIdx = self.readIdList.index(readId)
		except:
			print 'setAssayRawData: unknown readId %s for plateId %s' % (readId, self.plateId)
			return

		# add the data to the assay data for this plate
		self.assayList[readIdx].addAssayData(dataType, dataFields, defField, multidimData, self.dataSource)
		self.availableFeatures[readIdx] = makeFeatureList(dataFields, defField)
		nFeat = len(self.availableFeatures[readIdx])  # number of available assay features
		normList = []
		for i in range(nFeat):
			# initialize the normalized data for each feature to None
			normList.append(None)

		self.norm[readIdx] = normList
		# initialize the selected feature to the default feature (specified in the DB for the data type)
		self.selectedFeature[readIdx] = '%s' % defField.split(':')[0]
		self.getAssayData(readId) # initialize the assay data to the assay data corrseponding to the deault feature

	def getFeatures(self, readId):
		# get the list of features available for this assay
		try:
			readIdx = self.readIdList.index(readId)
		except:
			print 'getFeatures: unknown readId %s for plateId %s' % (readId, self.plateId)
			return

		featList = self.availableFeatures[readIdx]
		selectedIdx = self.assayList[readIdx].getSelectedFeatureIdx()
		return (featList, selectedIdx)

	def getAssayData(self, readId):
		# get the data for this assay for the presently selected feature 
		try:
			readIdx = self.readIdList.index(readId)
		except:
			print 'getAssayData: unknown readId %s for plateId %s' % (readId, self.plateId)
			return
		return self.assayList[readIdx].getFeatureData()

	def getCondition(self):
		# default condition is from the first assay (AD: doesn't appear to be used):
		return self.assayList[0].get_condition()

	def getRaw(self, readId=None):
		# get the raw (unnormalized) data for this plate. If no readId is specified, get the default feature data
		if readId != None:
			try:
				readIdx = self.readIdList.index(readId)
			except:
				return None
		else:
			readIdx = 0  # default to the default readId
		
		(rData, rValid) = self.getAssayData(self.readIdList[readIdx])
		return rData

	def getSelectedFeatureData(self, readId, feature):
		# get the data for the selected feature
		try:
			readIdx = self.readIdList.index(readId)
		except:
			print 'getSelectedFeatureData: unknown readId %s for plateId %s' % (readId, self.plateId)
			return

		return self.assayList[readIdx].getSelectedData(feature)

	def getSelectedFeature(self, readId):
		# get the feature that is presently selected for this assay
		try:
			readIdx = self.readIdList.index(readId)
		except:
			print 'getSelectedFeature: unknown readId %s for plateId %s' % (readId, self.plateId)
			return

		return self.assayList[readIdx].getSelectedFeature()

	def setSelectedFeature(self, readId, feature):
		# set the feature that is presently selected for this assay
		try:
			readIdx = self.readIdList.index(readId)
		except:
			print 'setSelectedFeature: unknown readId %s for plateId %s' % (readId, self.plateId)
			return

		#print 'Setting selected feature for %s to %s' % (readId,feature)
		self.assayList[readIdx].setSelectedFeature(feature)

	def invalidateNorm(self, readId):
		# invalidate the normalization, indicating that the normalization must be recomputed
		# Note: this is needed when some parameter is changed that might affect the normalized data values,
		#       for example, changing the IE threshold sliders might add or remove wells that are considere "valid"
		try:
			readIdx = self.readIdList.index(readId)
		except:
			print 'invalidateNorm: unknown readId %s for plateId %s' % (readId, self.plateId)
			return

		for normIdx in range(len(self.norm[readIdx])):
			self.norm[readIdx][normIdx] = None

	def setNorm(self, readId, data):
		# set the normalized data for this assay to 'data'
		try:
			readIdx = self.readIdList.index(readId)
		except:
			print 'setNorm: unknown readId %s for plateId %s' % (readId, self.plateId)
			return

		self.norm[readIdx][self.assayList[readIdx].getSelectedFeatureIdx()] = data

	def getNorm(self, readId):
		# get the normalized data for this assay
		try:
			readIdx = self.readIdList.index(readId)
		except:
			print 'getNorm: unknown readId %s for plateId %s' % (readId, self.plateId)
			return

		return self.norm[readIdx][self.assayList[readIdx].getSelectedFeatureIdx()]

	def computePlateStats(self, readId):
		# compute the and return following plate-level statistics for this assay:
		#	median, mean and STD for all of the valid (gene-targeting) hairpins on the plate
		#	median, mean and STD for all of the control hairpins on the plate
		vPlate = self.virusPlate
		data = self.getRaw(readId)
		hpList = []
		hpCtlList = []
                if data != None:
                        for row in data.keys():
                                for col in data[row].keys():
                                        vType = vPlate.getWellType(row, col)
                                        if abs(vType) == 1:
                                                hpCtlList.append(data[row][col])
                                                if vType == 1:
                                                        hpList.append(data[row][col])

		if len(hpList) == 0:
			hpMed = None
			hpMean = None
			hpStd = None
		else:
			hpMed = stats.lmedian(hpList)
			hpMean = stats.lmean(hpList)
			hpStd = stats.lsamplestdev(hpList)

		if len(hpCtlList) == 0:
			hpCtlMed = None
			hpCtlMean = None
			hpCtlStd = None
		else:
			hpCtlMed = stats.lmedian(hpCtlList)
			hpCtlMean = stats.lmean(hpCtlList)
			hpCtlStd = stats.lsamplestdev(hpCtlList)

		return [hpMed, hpMean, hpStd, hpCtlMed, hpCtlMean, hpCtlStd]

	def addCondition(self, readId, condition):
		# add a condition corresponding to this assay (assays can have multiple conditions)
		try:
			readIdx = self.readIdList.index(readId)
		except:
			print 'addCondition: unknown readId %s for plateId %s' % (readId, self.plateId)
			return

		self.assayList[readIdx].addCondition(condition)

	def get_condition(self, readId):
		# get the condition(s) for this assay
		try:
			readIdx = self.readIdList.index(readId)
		except:
			print 'get_condition: unknown readId %s for plateId %s' % (readId, self.plateId)
			return

		return (self.assayList[readIdx].get_condition())

	def get_conditionList(self):
		# get all conditions for all assays for this plate asa list
		cList = []
		for readId in self.readIdList:
			cList.append('%s' % self.get_condition(readId))
		return cList

	def getAssay(self, readId):
		# get the assay object corresponding to this readID
		try:
			readIdx = self.readIdList.index(readId)
		except:
			print 'getAssay: unknown readId %s for plateId %s' % (readId, self.plateId)
			return None

		return self.assayList[readIdx]

	def addPlateQry(self, screenId, screenName, batchId, batchName, plateName,
					barcode, plateFormat, selection, selectionStatus,
					replicate, virusPlateId):
		# fill in the information that was read from the database for this plate 
		self.screenId = screenId
		self.screenName = screenName
		self.batchId = batchId
		self.batchName = batchName
		self.plateName = plateName
		self.barcode = barcode
		self.virusPlateId = virusPlateId
		self.plateFormat = plateFormat
		self.selection = selection
		if selectionStatus.upper() == 'YES' or selectionStatus.upper() == 'PLUS':
			self.selectionStatus = 'YES'
		elif selectionStatus.upper() == 'NO' or selectionStatus.upper() == 'MINUS':
			self.selectionStatus = 'NO'
		else:
			print "addPlateQry: Unknown selection status '%s' for plate %s (assuming no selection)" % (selectionStatus, self.plateId)
			self.selectionStatus = 'NO'

		self.replicate = replicate

	def printSelf(self):
		# print a bunch of info about this plate (debugging)
		print 'Plate: %s : %s : %s' % (self.plateName, self.plateId, self.barcode)
		print '	   Screen: %s : %s' % (self.screenId, self.screenName)
		print '		Batch: %s : %s' % (self.batchId, self.batchName)
		print '  Virus plate: %s (%s)' % (self.virusPlateId, self.plateFormat)
		print '	Selection: %s : %s : %s' % (self.selection, self.selectionStatus,
											   self.replicate)
		aIdList = None
		for aIdx in range(len(self.readIdList)):
			if aIdList == None:
				aIdList = '	 %s : %s (%d)' % (self.readIdList[aIdx],
												 self.assayList[aIdx].get_condition(),
												 self.use4Ie[aIdx])
			else:
				aIdList = '%s\n	 %s : %s (%d)' % (aIdList,
													 self.readIdList[aIdx],
													 self.assayList[aIdx].get_condition(),
													 self.use4Ie[aIdx])
		print '	Assay Ids:'
		print '%s' % aIdList

	def setThresholds(self, lowThreshSlope, highThreshSlope, lowCutoff):
		# set the IE threshold information 
		self.lowThreshSlope = lowThreshSlope
		self.highThreshSlope = highThreshSlope
		self.lowCutoff = lowCutoff

	def getIE(self):
		# get the infection efficiency data for virus plate used for infecting this plate
		if self.ieData == None:
			ieData = masterObject.getField('ieData')
			if ieData.keys().count(self.virusPlateId) > 0:
				for key in ieData[self.virusPlateId].keys():
					ieTemp = ieData[self.virusPlateId][key]
					if self.screenId == ieTemp.screenId and self.batchId == ieTemp.batchId:
						print '  ..using ieData from:', ieTemp.printSelf()
						self.ieData = ieTemp
			if self.ieData == None:	# still....
				return None
			else:
				return self.ieData.ie
		else:
			return self.ieData.ie

	def setIeData(self, ieData):
		# fill in th IE data for this plate (many plate con hav ethe same IE data)
		self.ieData = ieData

	def getIeMask(self):
		# get the IE mask used for IE calculation
		if not self.ieMaskValid:
			self.computeIeMask()
			self.ieMaskValid = 1

		return self.ieMask

	def invalidateIE(self):
		# invalidate the IE mask (indicate that the IE must be recomputed)
		self.ieMaskValid = 0

	def computeIeMask(self):
		if self.ieData == None:
			# get the IE data if the ieData field is empty
			ieData = masterObject.getField('ieData')
			vPlateIdList = ieData.keys()
			useIe = 0    # use the virus plate to get the well type
		elif not masterObject.getField('Update analysis') and self.ieMaskValid:
			# if the infection efficiency has already been calculated, and nothing has 
			# changed, just return with the previous result 
			return
		else:
			# otherwise, get the existing ie data, and fill mData and pData with the 
			# puro- and puro+ data, respectively
			useIe = 1    # get the well type from the ieData.type array
			ie = self.ieData.ie
			mData = self.ieData.selMinusData
			pData = self.ieData.selPlusData

		# shorter names, for convenience
		lT = self.lowThreshSlope
		hT = self.highThreshSlope
		lC = self.lowCutoff

		# use the raw data as the plate layout template (row x col):
		raw = self.getRaw()

		# for the mask, 0 means don't use, otherwise it is set to the hairpin type:
		self.ieMask = {}
		for row in raw.keys():
			self.ieMask.setdefault(row, {})
			for col in raw[row].keys():
				if useIe:
					wType = self.ieData.type[row][col]
				else:
					# print self.virusPlateId + " " + row + " " + col
					wType = self.virusPlate.well[row][col].type

				# apply thresholds:
				if useIe and ((ie[row][col] >= lT and ie[row][col] <= hT) or mData[row][col] < lC or pData[row][col] < lC):
					self.ieMask[row][col] = wType
				elif not useIe:
					self.ieMask[row][col] = wType
				else:
					self.ieMask[row][col] = 0

		# set the flag to indicate that the IE mask is valid
		self.ieMaskValid = 1
		return 1

	# b-score normalization algorithm
	# This algorithm is described (for example) in "Statistical practice in high-throughput
	# screening data analysis", Malo, Nadon, et al. in Nature Computational Biology, V24, #2, Feb 2006.
	def bScore(self, quad):
		#print 'computing b-score for plateId: %s' % self.plateId
		# save the type of normalization for the writing into the output file
		if quad:
			self.normMethod = 'bscore, quadrant-based'
			self.scoreStr = 'bScore(q)'
		else:
			self.normMethod = 'bscore, plate-based'
			self.scoreStr = 'bScore'

		ieMask = self.getIeMask()

		for readId in self.readIdList:
			# normalize each assay
			if self.getNorm(readId) != None:
				# skip the normalization if it is already computed (normalization is invalidated by setting the normalized data field to None)
				continue
			(raw, valid) = self.getAssayData(readId)	# << should deal with 'valid' flags...

			rawQ = []   # raw data after data is masked
			allQ = []   # all raw data
			norm = {}
			if quad:
				# quadrant-based normalization
				rawQ = [{}, {}, {}, {}]
				allQ = [{}, {}, {}, {}]
				for row in raw.keys():
					for col in raw[row].keys():
						qIdx = 2 * (ALPHABET.index(row) % 2) + int(col) % 2
						allQ[qIdx].setdefault(row, {})
						allQ[qIdx][row][col] = raw[row][col]

						if ieMask[row][col]:          # don't use empty or non-valid wells
							rawQ[qIdx].setdefault(row, {})
							rawQ[qIdx][row][col] = raw[row][col]
			else:
				# whole plate-based normalization
				rawQ = [{}]
				allQ = [{}]
				for row in raw.keys():
					for col in raw[row].keys():
						allQ[0].setdefault(row, {})
						allQ[0][row][col] = raw[row][col]
						if ieMask[row][col] == 1:     # don't use empty or non-valid wells
							rawQ[0].setdefault(row, {})
							rawQ[0][row][col] = raw[row][col]

			for qIdx in range(len(rawQ)):
				# do the actual b-score computation
				norm = computeBscore(rawQ[qIdx], allQ[qIdx], norm)

			self.setNorm(readId, norm)

	# z-score normalization algorithm
	def zScore(self, quad, zType):
		# save the type of normalization for the writing into the output file
		if zType == 'normal':
			# z-score: 
			# the z-score of a well with a value of x is (x-plateMean)/plateSD,
			# where plateMean is the mean of all the (non-empty and valid) wells on the plate and
			# plateSD is the standard deviation over those same wells
			self.scoreStr = 'z-score'
		else:
			# robust z-score: 
			# the robust z-score of a well with a value of x is (x-plateMedian)/plateMAD,
			# where plateMedian is the median of all the (non-empty and valid) wells on the plate and
			# plateMAD is the Median Absolute Deviation (MAD) over those same wells
			self.scoreStr = 'robust z'

		if quad:
			self.normMethod = '%s z-score, quadrant-based' % zType
			self.scoreStr = '%s(q)' % self.scoreStr
		else:
			self.normMethod = '%s z-score, plate-based' % zType

		# get the IE mask
		ieMask = self.getIeMask()

		for readId in self.readIdList:
			if self.getNorm(readId) != None:
				# skip the normalization if it is already computed (normalization is invalidated by setting the normalized data field to None)
				continue
			(raw, valid) = self.getAssayData(readId)	# << should deal with 'valid' flags...
			rawQ = []
			norm = {}

			if quad:
				# quadrant-based normalization
				rawQ = [{}, {}, {}, {}]
				for row in raw.keys():
					for col in raw[row].keys():
						qIdx = 2 * (ALPHABET.index(row) % 2) + int(col) % 2
						rawQ[qIdx].setdefault(row, {})
						rawQ[qIdx][row][col] = raw[row][col]
			else:
				# whole plate-based normalization
				rawQ.append(raw)

			for qIdx in range(len(rawQ)):
				# gather all non-empty, non-pgw, non-IE-masked points:
				valList = []

				for row in rawQ[qIdx].keys():
					for col in rawQ[qIdx][row].keys():
						if ieMask[row][col] == 1 or ieMask[row][col] == -1:
							valList.append(float(rawQ[qIdx][row][col]))

				# compute the z-statistics based on the z-score type:
				if len(valList) == 0:
					print 'No data values for readId %s' % readId
					zM = 0
					zS = 0
				else:
					if zType == 'normal':
						# compute mean and STD of the selected wells:
						zM = stats.lmean(valList)
						zS = stats.lsamplestdev(valList)
					elif zType == 'robust':
						# compute the median and MAD of the selected wells:
						zM = stats.lmedianscore(valList)
						zDiff = valList[0:]   # copy the input list
						for i in range(len(zDiff)):
							zDiff[i] = abs(zDiff[i] - zM)
						zS = stats.lmedianscore(zDiff)
					else:
						# just in case...
						print 'Warning: unknown zType: %s' % zType
						zM = 0
						zS = 0

				# compute the zScore for every well:
				for row in rawQ[qIdx].keys():
					norm.setdefault(row, {})
					for col in rawQ[qIdx][row].keys():
						if zS != 0:
							norm[row][col] = (rawQ[qIdx][row][col] - zM) / zS
						else:
							norm[row][col] = -1      # AD: not sure why this isn't 0?

			#self.norm.append(norm)
			self.setNorm(readId, norm)

	# percent-of-control normalization algorithm
	# This algorithm would better described as 'fraction-of-control'. The result for
	# each well is the ratio of the well value to the median of the control wells on the plate.
	# Precent of control would actually be 100.0 x this value.
	def PoC(self, quad):
		# save the type of normalization for the writing into the output file
		if quad:
			self.normMethod = 'PoC, quadrant-based'
			self.scoreStr = 'PoC(q)'
		else:
			self.normMethod = 'PoC, plate-based'
			self.scoreStr = 'PoC'

		# get the IE mask
		ieMask = self.getIeMask()

		for readId in self.readIdList:
			# compute the normalization for each assay
			if self.getNorm(readId) != None:
				# skip the normalization if it is already computed (normalization is invalidated by setting the normalized data field to None)
				continue
			(raw, valid) = self.getAssayData(readId)	# << should deal with 'valid' flags...
			rawQ = []
			norm = {}
			nCtrl = None
			if quad:
				# quadrant-based normalization
				rawQ = [{}, {}, {}, {}]
				nCtrl = self.virusPlate.nqControls[0:]
				for row in raw.keys():
					for col in raw[row].keys():
						qIdx = 2 * (ALPHABET.index(row) % 2) + int(col) % 2
						rawQ[qIdx].setdefault(row, {})
						rawQ[qIdx][row][col] = raw[row][col]
			else:
				# whole plate-based normalization
				rawQ.append(raw)
				nCtrl = [self.virusPlate.nControls]

			for qIdx in range(len(rawQ)):
				# first gather the control well values into a list:
				valList = []
				hpValList = []

				for row in rawQ[qIdx].keys():
					for col in rawQ[qIdx][row].keys():
						if ieMask[row][col] == -1:
							valList.append(float(rawQ[qIdx][row][col]))
						elif ieMask[row][col] == 1:     # for plates w/ no controls
							hpValList.append(float(rawQ[qIdx][row][col]))


				# get the median of the control well values:
				if len(valList) == 0:
					# if ther eare no control wells, set the median to 0
					print 'No control wells values for readId %s' % readId
					med = 0.0
				else:
					# get the median of the control wells
					med = float(stats.lmedianscore(valList))

				# compute the normalized well value as a fraction of the median control well value:
				for row in rawQ[qIdx].keys():
					norm.setdefault(row, {})
					for col in rawQ[qIdx][row].keys():
						if med == 0.0:
							# set the PoC to 0 if the median is 0 (or no control wells)
							norm[row][col] = 0.0
						else:
							# compute the PoC as the ratio of the well value to the median of the control wells 
							norm[row][col] = float(rawQ[qIdx][row][col]) / med


			#self.norm.append(norm)
			self.setNorm(readId, norm)

	# no normalization: just retuen the raw data
	def noNorm(self):
		# for writing to the output file:
		self.normMethod = 'none (raw data)'
		self.scoreStr = 'raw data'
		for readId in self.readIdList:
			if self.getNorm(readId) != None:
				# skip the normalization if it is already computed (normalization is invalidated by setting the normalized data field to None)
				continue
			(raw, valid) = self.getAssayData(readId)	# << should deal with 'valid' flags...
			self.setNorm(readId, raw)

	def getPlateInfo(self, analysisSet):
		# this method return information about the plate for writing into the output file
		# pI (plate information) is a dictionary containing a list of all parameter names (referenced by the 'paramList' key)
		# and an data corresponding to each item in the parameter list, using the list item as a key.  
		pI = {}
		pI['paramList'] = []
		pI['plateName'] = self.plateName
		pI['paramList'].append('plateName')
		pI['plateId'] = self.plateId
		pI['paramList'].append('plateId')
		pI['screenName'] = self.screenName
		pI['paramList'].append('screenName')
		pI['screenId'] = self.screenId
		pI['paramList'].append('screenId')
		pI['batchName'] = self.batchName
		pI['paramList'].append('batchName')
		pI['batchId'] = self.batchId
		pI['paramList'].append('batchId')
		pI['virusPlateName'] = self.virusPlate.plateName
		pI['paramList'].append('virusPlateName')
		pI['virusPlateId'] = self.virusPlateId
		pI['paramList'].append('virusPlateId')
		pI['Selection'] = self.selection
		pI['paramList'].append('Selection')
		pI['Selection status'] = self.selectionStatus
		pI['paramList'].append('Selection status')

		assayIdx = 1
		plateStats = {}
                readIds = analysisSet.getReadIds()
		for readId in readIds:
			cond = self.get_condition(readId)        # the condition corresponding to this assay
                        if cond == None:
                                cond = "N/A"
			cString = 'Condition_%d' % assayIdx      # a separate condition name for each assay
			pI[cString] = cond
			pI['paramList'].append(cString)
                        idx = readIds.index(readId)
                        plateStats[idx] = self.computePlateStats(readId) # compute the plate statistics for this assay
                        assayIdx += 1

		pI['Low IE threshold'] = self.lowThreshSlope
		pI['paramList'].append('Low IE threshold')
		pI['High IE threshold'] = self.highThreshSlope
		pI['paramList'].append('High IE threshold')
		pI['Control wells'] = self.controlWells
		pI['paramList'].append('Control wells')

		idxList = plateStats.keys()
		idxList.sort()

		# create separate keys for each statistic for each assay
		for idx in idxList:
			# plate stats:
			#	0 = hp median
			#	1 = hp mean
			#	2 = hp stDev
			#	3 = ctrl median
			#	4 = ctrl mean
			#	5 = ctrl stDev
			key = 'hp median (%d)' % idx
			pI[key] = plateStats[idx][0]
			pI['paramList'].append(key)
			key = 'hp mean (%d)' % idx
			pI[key] = plateStats[idx][1]
			pI['paramList'].append(key)
			key = 'hp stDev (%d)' % idx
			pI[key] = plateStats[idx][2]
			pI['paramList'].append(key)
			key = 'ctl median (%d)' % idx
			pI[key] = plateStats[idx][3]
			pI['paramList'].append(key)
			key = 'ctl mean (%d)' % idx
			pI[key] = plateStats[idx][4]
			pI['paramList'].append(key)
			key = 'ctl stDev (%d)' % idx
			pI[key] = plateStats[idx][5]
			pI['paramList'].append(key)

		return pI

def getQuad(row, col):
	# figure out which "quadrant" a row and column location corresponds to.
	# This is for decomposing a 384-well plate into 4, interleaved 96-well plates, assuming
	# that well A01 of the 384-well plate corresponds to well A01 of 96-well plate 1 (i.e. "quadrant 1"),
	# well A02 corresponds to well A01 of 96-well plate 2 (quadrant 2), etc, and
	# well B01 corresponds to well A01 of 96-well plate 3 (quadrant 3), well B02 corresponds to well A01 
	# of 96-well plate 4 (quadrant 4), etc.
	rowIdx = ALPHABET.index(row)
	colIdx = int(col) - 1
	quad = (rowIdx % 2) * 2 + colIdx % 2
	return quad

def makeFeatureList(dataFields, defField):
	# returns a list of features for this assay, with the default feature as the
	# first item in the list:
	featList = []
	allFeats = dataFields.split(',')
	for feat in allFeats:
		featName = feat.split(':')[0]
		if featList.count(featName) == 0:
			featList.append(featName)

	return featList

# Assay class definition: contains all information for each assay of each plate:
class Assay:
	def __init__(self, assayId):
		self.assayId = assayId    # uniquely identifies the assay information
		self.condition = None     # the condition associated with this assay
		self.dataType = None      # the name of the data type for this assay
		self.dataFields = None    # a formatted string describing the data fields
		self.defField = None      # the default field (must be one of the fields in dataFields)
		self.featureList = None   # the list of features corresponding to the data fields
		self.selectedFeature = None   # the feature that is presently selected
		self.loadedFeature = None     # the feature whose data is presently loaded
		self.mdData = None    # unparsed data array read directly from the database
		self.defData = None   # parsed raw data from the default field
		self.selData = None   # parsed data from the selected field
		self.valid = None     # array of 'valid' flags
		self.use = 1          # the "use" flag, indicating whether this data should be written, displayed, etc.
		self.norm = None      # normalized data (None indicates that the normalization must be computed)
		self.dataSource = 'DB'    # the data is assumed to come from the database, unless otherwise indicated.

	def addAssayData(self, dataType, dataFields, defField, multidimData, dataSource='DB'):
		# add data for a new assay
		self.dataSource = dataSource  # database or file
		self.dataType = dataType      # the name of the data type for this assay
		self.dataFields = dataFields  # a formatted string describing the data fields
		self.defField = defField      # the default field (must be one of the fields in dataFields)
		self.mdData = multidimData    # unparsed data array read directly from the database or file
		self.selectedFeature = self.defField.split(':')[0].strip()    # set the selected feature to the default feature
		self.featureList = []         # initialize the list of features
		dfList = self.dataFields.split(',')   # split the comma-separated list of features into a list
		for feat in dfList:
			# make a list of feature names
			thisFeat = feat.split(':')[0].strip()
			if self.featureList.count(thisFeat) == 0:
				self.featureList.append(thisFeat)

	def getUse(self):
		# get the "use" flag
		return self.use

	def setUse(self, val):
		# set the "use" flag
		self.use = val

	def addCondition(self, condition):
		# add condition information for the assay. If there are multiple conditions, 
		# concatenate them into a single string
		if self.condition == None:
			self.condition = '%s' % condition
		else:
			self.condition = '%s;%s' % (self.condition, condition)

	def get_condition(self):
		# get the condition string
		return self.condition

	def getFeatureList(self):
		# get the list of features
		return self.featureList

	def getFeatureData(self):
		# if this feature is the data that has already been loaded, just return the data
		if self.selectedFeature == self.loadedFeature:
			return (self.defData, self.valid)
		else:
			# otherwise return the data for the selected feature:
			if self.featureList.count(self.selectedFeature) == 0:
				print "Feature %s doesn't exist" % self.selectedFeature
				return (None, None)

			# load the feature data into the defData map:
			self.defData = {}
			self.valid = {}
			fIdx = self.featureList.index(self.selectedFeature)
			fFields = self.dataFields.split(',')
			fType = fFields[fIdx].split(':')[1]

			if self.dataSource == 'DB':
                                for row in sorted(self.mdData.keys()):
                                        cols = self.mdData[row]
                                        for col in sorted(cols.keys()):
                                                score = cols[col]['score'][fIdx]
                                                featureType = cols[col]['type'][fIdx]

                                                if featureType == '%d':
                                                        dataVal = int(score)
                                                elif featureType == '%f':
                                                        try:
                                                                dataVal = float(score)
                                                        except:
                                                                print 'dataVal format error: ', dataCols[fIdx]
                                                else:
                                                        print 'unknown data type: %s' % featureType
                                                        #print 'unknown data type: %s' % dataCols[fIdx]
                                                        dataVal = None

                                                self.defData.setdefault(row, {})
                                                self.defData[row][col] = dataVal
                                                self.valid.setdefault(row, {})
                                                self.valid[row][col] = 1

			# data from flat file:
			else:
				for row in self.mdData.keys():
					for col in self.mdData[row].keys():
						# straightforward copy of data into row/col array
						dataSet = self.mdData[row][col]
						#print row, col, fType, dataSet
						if fType == '%d':
							dataVal = int(dataSet.split(',')[fIdx])
						elif fType == '%f':
							try:
								dataVal = float(dataSet.split(',')[fIdx])
							except:
								print 'dataVal format error: ', dataSet.split(',')[fIdx]
						else:
							print 'unknown data type: %s' % dataSet.split(',')[fIdx]
							dataVal = None

						self.defData.setdefault(row, {})
						self.defData[row][col] = dataVal
						self.valid.setdefault(row, {})
						self.valid[row][col] = 1

		# set the loaded feature to the selected feature (If the selected feature is the same as the loaded feature
		# the next time this is called, loading the data is skipped.)
		self.loadedFeature = self.selectedFeature
		return (self.defData, self.valid)

	def setSelectedFeature(self, feature):
		# set the selected feature
		if self.featureList.count(feature) == 0:
			return
		else:
			self.selectedFeature = feature
			MajorObjects.printToLog('setting selected feature to %s (index: %d)' % (self.selectedFeature,
                                                                                                self.featureList.index(self.selectedFeature)))

	def getSelectedFeature(self):
		return self.selectedFeature

	def getSelectedFeatureIdx(self):
		return self.featureList.index(self.selectedFeature)

	def getSelectedData(self, feature):
		self.selData = {}
		self.valid = {}
		sIdx = None
		dFields = self.dataFields.split(',')
		for dIdx in range(len(dFields)):
			(dName, dType) = dFields[dIdx].split(':')
			if dName == feature:
				sIdx = dIdx
				break

		if sIdx == None:
			print 'Unknown feature "%s"' % feature
			return None

		for datum in self.mdData:
			# extract the data from the database "datum" object
			(row, col, fields, isValid) = datum.getAttributes()
			# convert row/col from CLOB to strings:
			row = row.getSubString(1, 1)
			col = col.getSubString(1, 2)
			dataCols = fields.getSubString(1, fields.length()).split(',')
			if dType == '%d':
				dataVal = int(dataCols[dIdx])
			elif dType == '%f':
				dataVal = float(dataCols[dIdx])
			else:
				dataVal = None

			self.selData.setdefault(row, {})
			self.selData[row][col] = dataVal
			self.valid.setdefault(row, {})
			self.valid[row][col] = isValid

		return (self.selData, self.valid)

	def getAssayHeader(self, highlightFeature):
		# create the section of the header that displays the feature names, with an asterisk
		# before the selected feature. The asterisk indicates which of the features was used 
		# as the source of the normalized data, The output string is used as part of the output 
		# file column header.  
		fields = self.dataFields.split(',')
		nFields = len(fields)
		header = None
                
		feat2highlight = highlightFeature[self.condition][0]
		for field in fields:
			fVal = field.split(':')[0]
			if fVal == feat2highlight:
				highlighter = "*"
                        else:
                                highlighter = ""
			if header == None:
				header = '%s%s:%s' % (highlighter, self.condition, fVal)
			else:
				header = '%s\t%s%s:%s' % (header, highlighter, self.condition, fVal)
		return header

	def getAssayData(self):
		# get the data from this assay. The data is returned as a list of two items:
		#	[0] the data
		#	[1] a matching array of flags indicating if the data point is valid. 
		
		# construct the data format string:
		fields = self.dataFields.split(',')
                numFields = len(fields)
		fmt = []
		for field in fields:
			# the second field ([1]) indicates the data format (%f=float, %d=integer) 
			fVal = field.split(':')[1]
			fmt.append(fVal)
			
		# initialize to blank dictionaries
		aData = {}
		valid = {}

		if self.dataSource == 'DB':
                        for row in sorted(self.mdData.keys()):
                                cols = self.mdData[row]
                                for col in sorted(cols.keys()):
                                        dataStr = ""
                                        sep = ""
                                        for fieldIndex in range(numFields):
                                                score = cols[col]['score'][fieldIndex]
                                                if score == None:
                                                        score = 0
                                                featureType = cols[col]['type'][fieldIndex]

                                                if featureType == "%f":
                                                        dataStr = dataStr + sep + (featureType % float(score))
                                                elif featureType == "%d":
                                                        dataStr = dataStr + sep + (featureType % int(score))
                                                else:
                                                        raise Exception("Unknown field type: " + featureType + " for data value " + score)
                                                sep = "\t"

                                        aData.setdefault(row, {})
                                        aData[row][col] = dataStr
                                        valid.setdefault(row, {})
                                        valid[row][col] = 1

		# data from flat file:
		else:
			for row in self.mdData.keys():
				for col in self.mdData[row].keys():
					# split the comma-separated list of values into a list
					dataFlds = self.mdData[row][col].split(',')
					for i in range(len(dataFlds)):
						if i == 0:
							dataStr = fmt[i] % float(dataFlds[i])    # properly format the data according to the format field of this feature
						else:
							try:
								# this string is a tab-separated list of data values
								dataStr = '%s\t%s' % (dataStr, fmt[i] % float(dataFlds[i]))
							except:
								print 'Assay ID: ', self.assayId
								print 'dataStr format error: dataType, row, col, fmt, dataFlds: ', self.dataType, row, col, fmt[i], dataFlds
					aData.setdefault(row, {})
					aData[row][col] = dataStr
					valid.setdefault(row, {})
					valid[row][col] = 1        # all data read from a flat file is valid by definition

		return (aData, valid)
	
# IEdata class definition: contains infection efficiency data for each virus plate in each screen/batch/condition combination
class IEdata:
	def __init__(self, virusPlateId, condition):
		self.virusPlateId = virusPlateId  # virus plate ID
		self.virusPlate = None            # virus plate object
		self.virusPlateName = None        # virus plate name
		self.condition = None             # condition associated with this IE data
		self.screenId = None              # screen ID
		self.screenName = None            # screen name
		self.batchId = None               # batch ID
		self.batchName = None             # batch name
		self.selMinusPlateList = []       # list of puro- plates
		self.selMinusAssayList = []       # list of puro- assays
		self.selPlusPlateList = []        # list of puro+ plates
		self.selPlusAssayList = []        # list of puro+ assays
		self.selMinusData = {}            # puro- data
		self.selPlusData = {}             # puro+ data
		self.ie = {}                      # computed infection efficiency
		self.type = {}                    # type of each well (hairpin, control, etc.)

	def printSelf(self):
		# print the info about this IEdata
		print 'virusPlateId: ', self.virusPlateId
		print 'virusPlate: ', self.virusPlate
		print 'virusPlateName: ', self.virusPlateName
		print 'condition: ', self.condition
		print 'screenId: ', self.screenId
		print 'screenName: ', self.screenName
		print 'batchId: ', self.batchId
		print 'batchName: ', self.batchName
		print 'selMinusPlateList: ', self.selMinusPlateList
		print 'selMinusAssayList: ', self.selMinusAssayList
		print 'selPlusPlateList: ', self.selPlusPlateList
		print 'selPlusAssayList: ', self.selPlusAssayList

	def computeIE(self, mPlateIds, mReadIds, pPlateIds, pReadIds, plateSet, vPlateSet):
		# compute the infection efficiency for a set of plates 
		self.selMinusPlateList = mPlateIds        # assign the puro- plate IDs
		self.selMinusAssayList = mReadIds         # assign the puro- plate assays
		self.selPlusPlateList = pPlateIds         # assign the puro+ plate IDs
		self.selPlusAssayList = pReadIds          # assign the puro+ plate assays
		vPlate = vPlateSet[self.virusPlateId]     # get the virus plate object for this virus plate...
		self.virusPlate = vPlate                  # ...and assign it
		self.virusPlateName = vPlate.plateName    # virus plate name

		if len(self.selMinusAssayList) > 0 and len(self.selPlusAssayList) > 0:
			# assign the screenId, screenName, batchId, and batchName from one of the plates:
			self.screenId = plateSet[self.selMinusPlateList[0]].screenId
			self.screenName = plateSet[self.selMinusPlateList[0]].screenName
			self.batchId = plateSet[self.selMinusPlateList[0]].batchId
			self.batchName = plateSet[self.selMinusPlateList[0]].batchName

			# compute median of -selection plates:
			self.selMinusData = {}
			for readIdx in range(len(mReadIds)):
				# get default datafields from these assays for each well:
				(mData, valid) = plateSet[mPlateIds[readIdx]].getAssayData(mReadIds[readIdx])
				for row in mData.keys():
					self.selMinusData.setdefault(row, {})
					for col in mData[row].keys():
						self.selMinusData[row].setdefault(col, [])
						if valid[row][col]:
							# make a list of values corresponding to each well of the virus plate
							self.selMinusData[row][col].append(mData[row][col])

			# replace the list of values for each well with the median value from each list:
			for row in self.selMinusData.keys():
				for col in self.selMinusData[row].keys():
					self.selMinusData[row][col] = vectorMedian(self.selMinusData[row][col])

			# compute median of +selection plates:
			self.selPlusData = {}
			for readIdx in range(len(pReadIds)):
				# get default datafields from these assays for each well:
				(mData, valid) = plateSet[pPlateIds[readIdx]].getAssayData(pReadIds[readIdx])
				for row in mData.keys():
					self.selPlusData.setdefault(row, {})
					for col in mData[row].keys():
						self.selPlusData[row].setdefault(col, [])
						if valid[row][col]:
							# make a list of values corresponding to each well of the virus plate
							self.selPlusData[row][col].append(mData[row][col])

			# replace the list of values for each well with the median value from each list:
			for row in self.selPlusData.keys():
				for col in self.selPlusData[row].keys():
					self.selPlusData[row][col] = vectorMedian(self.selPlusData[row][col])

			# finally, compute the infection efficiency as selPlusData/selMinusData
			for row in self.selPlusData.keys():
				self.ie.setdefault(row, {})
				self.type.setdefault(row, {})
				for col in self.selPlusData[row].keys():
					pVal = self.selPlusData[row][col]
					try:
						mVal = self.selMinusData[row][col]
					except:
						print 'Missing -sel value for row: %s, col %s' % (row, col)

					if mVal == 0:
						self.ie[row][col] = None
					else:
						# compute the infection efficiency 
						self.ie[row][col] = float(pVal) / float(mVal)

					# assign the well type from the source virus plate
					self.type[row][col] = vPlate.getWellType(row, col)



	def getPointSets(self, cond):
		# create the pointSets to be used for the jFreeChart plots:
		hPointset = XYSeries(cond)
		cPointset = XYSeries(cond)
		ePointset = XYSeries(cond)
		pPointset = XYSeries(cond)
		xMax = 0
		yMax = 0
		typeCount = [0, 0, 0, 0]  # [hp, empty, ctrl, pgw]
		for row in self.selPlusData.keys():
			for col in self.selPlusData[row].keys():
				try:
					self.type[row][col]    # the data points are colored accoring to their type
				except:
					continue

				vType = self.type[row][col]
				# record the max x value for the plot
				x = float(self.selMinusData[row][col])
				if x > xMax:
					xMax = x

				y = float(self.selPlusData[row][col])
				# record the max x value for the plot
				if y > yMax:
					yMax = y

				# count the number of each point type
				if vType == 1:
					hPointset.add(x, y)
					typeCount[0] += 1
				elif vType == 0:
					ePointset.add(x, y)
					typeCount[1] += 1
				elif vType == -1:
					cPointset.add(x, y)
					typeCount[2] += 1
				elif vType == -2:
					pPointset.add(x, y)
					typeCount[3] += 1

		# return the pointsets, counts and max x and y values
		return (hPointset, cPointset, ePointset, pPointset, typeCount, xMax, yMax)

def vectorMedian(vList):
	# compute the median over a vector of values
	if len(vList) == 0:
		return None					   # no values, median=None
	elif len(vList) == 1:
		return vList[0]				   # 1 value, median=value
	elif len(vList) == 2:
		return (vList[0] + vList[1]) / 2.0	# 2 values, median=mean
	else:
		tList = vList[0:]
		tList.sort()
		if len(tList) % 2 == 0:  # even number of values
			midPt = len(tList) / 2
			return (tList[midPt - 1] + tList[midPt]) / 2.0
		else:
			midPt = int(len(tList) / 2.0)
			return tList[midPt]

def zStats(dList, zType):
	# AD: doesn't appear to be used
	if zType == 'normal':
		# for normal z-score, return the mean and StdDev of the list:
		zM = stats.lmean(dList)
		zS = stats.lsamplestdev(dList)
	elif zType == 'robust':
		# for robust z-score, return the median and MAD of the list:
		zM = stats.lmedianscore(dList)
		zDiff = dList[0:]   # copy the input list
		for i in range(len(zDiff)):
			zDiff[i] = abs(zDiff[i] - zM)
		zS = stats.lmedianscore(zDiff)
	else:
		print 'Warning: unknown zType: %s' % zType
		zM = 0
		zS = 0

	return [zM, zS]

def computeZ(a, b, c):
	# AD: doesn't appear to be used
	# this works for either z or robust-z scoring.
	# simply computes (a-b)/c. If an exception occurs, it returns 0
	try:
		z = (a - b) / c
	except:
		z = 0

	return z

def compressList(inList, maskList):
	# AD: doesn't appear to be used
	# make a list of values from a list and a corresponding mask. Values masked with a "1" are omitted from the output list
	outList = []

	for i in range(len(inList)):
		if not maskList[i]:	  # 0's mark cells to NOT mask
			outList.append(inList[i])
	return outList

def compressList_old(inList):
	# AD: doesn't appear to be used
	outList = []
	for x in inList:
		if not x == None:
			outList.append(x)
	return outList

def quadrifyWellArray(wa):
	# AD: doesn't appear to be used
	waList = [[], [], [], []]
	for rIdx in range(len(wa)):
		tRow = [[], []]		# temp row list
		rIdx0 = 2 * (rIdx % 2)	# index of first quad for this row
		for cIdx in range(len(wa[rIdx])):
			tIdx = cIdx % 2		 # temp row list idx
			tRow[tIdx].append(wa[rIdx][cIdx])
		waList[rIdx0].append(tRow[0])
		waList[rIdx0 + 1].append(tRow[1])

	return waList

def dequadrifyWellArray(waList):
	# AD: doesn't appear to be used
	wa = []
	for rIdx in range(len(waList[0])):
		rTemp = [[], []]
		for cIdx in range(len(waList[0][0])):
			rTemp[0].append(waList[0][rIdx][cIdx])  # interleave waList 0 and 1...
			rTemp[0].append(waList[1][rIdx][cIdx])
			rTemp[1].append(waList[2][rIdx][cIdx])  # interleave waList 2 and 3...
			rTemp[1].append(waList[3][rIdx][cIdx])
		wa.append(rTemp[0])
		wa.append(rTemp[1])

	return wa

def array2plateMap(wa):
	# AD: doesn't appear to be used
	waMap = {}
	for rIdx in range(len(wa)):
		row = ALPHABET[rIdx]
		waMap[row] = {}
		for cIdx in range(len(wa[rIdx])):
			col = '%02d' % (cIdx + 1)
			waMap[row][col] = wa[rIdx][cIdx]

	return waMap

def computeBscore(maskedData, allData, normData):
	# performs the actual b-score computation:
	
	# initialize oldSum to 0 and start iterating:
	converged = 0
	oldSum = 0.0
	#grandEffect = 0.0   # <<< not used in any calculations!

	# make lists of row and column leys:
	rowList = allData.keys()
	rowList.sort()
	colList = []

	# create colList:
	for row in rowList:
		for col in allData[row].keys():
			if colList.count(col) == 0:
				colList.append(col)
	colList.sort()

	rowPolish = {}
	colPolish = {}
	loopCtr = -1

	while not converged:

		loopCtr += 1
		# row median polish:
		rList = []
		for row in rowList:
			# make list of row values:
			R = []
			for col in colList:
				try:
					R.append(maskedData[row][col])
				except:
					pass

			# compute median of row
			if len(R) > 0:
				rMed = stats.lmedianscore(R)
				rList.append(rMed)
			else:
				rMed = 0.0

			# subtract median from row values
			for col in colList:
				try:
					allData[row][col] -= rMed
					maskedData[row][col] -= rMed
				except:
					pass

		# update grandEffect:
		#if len(rList):
		#	grandEffect += lmedianscore(rList)

		# col median polish:
		cList = []
		for col in colList:
			C = []
			# make list of column values:
			for row in rowList:
				try:
					C.append(maskedData[row][col])
				except:
					pass

			# compute median of column:
			if len(C) > 0:
				cMed = stats.lmedianscore(C)
				cList.append(cMed)
			else:
				cMed = 0.0

			# subtract median from col values:
			for row in rowList:
				try:
					allData[row][col] -= cMed
					maskedData[row][col] -= cMed
				except:
					pass

		# update grandEffect:
		#if len(cList):
		#	grandEffect += lmedianscore(cList)

		# compute the new residual sum:
		newSum = 0.0
		for row in rowList:
			for col in colList:
				try:
					newSum += abs(maskedData[row][col])
				except:
					pass

		# check convergence (limit to 10 iterations):
		if newSum == 0.0 or abs(newSum - oldSum) <= (BSCORE_EPS * newSum) or loopCtr > 10:
			converged = 1
		else:
			oldSum = newSum

	# after convergence, compute the MAD of the residuals:
	residList = []
	for row in rowList:
		for col in colList:
			try:
				residList.append(maskedData[row][col])
			except:
				pass

	# median of residuals:
	if len(residList):
		rMedian = stats.lmedianscore(residList)
	else:
		# no input data:
		rMedian = None

	# subtract residual median from residuals:
	for idx in range(len(residList)):
		residList[idx] = abs(residList[idx] - rMedian)

	# the MAD is adjusted by a fudge-factor:
	if len(residList):
		MAD = 1.4826 * stats.lmedianscore(residList)
	else:
		MAD = 1.0

	#print 'MAD: ',MAD

	for row in rowList:
		for col in colList:
			try:
				if MAD == 0.0:
					b = 0.0
				else:
					b = allData[row][col] / MAD
				normData.setdefault(row, {})
				normData[row][col] = b
			except:
				pass

	return normData

# VirusPlateInfo class definition: contains information about the virus plates used for infection.
# General info about the number and identification of plates that used this virus plate for infection
class VirusPlateInfo:
	def __init__(self, vPlateId):
		self.vPlateId = vPlateId
		self.counters = {}
		self.readIds = {}
		self.plateIds = {}

	def updateCount(self, condition, selStat, readId, plateId, use):
		# update various counts and lists
		self.counters.setdefault(condition, {})
		self.readIds.setdefault(condition, {})
		self.plateIds.setdefault(condition, {})
		self.counters[condition].setdefault('+', 0)   # count of puro+ plates for this screen:batch:condition
		self.readIds[condition].setdefault('+', [])   # list of puro+ assays for this screen:batch:condition
		self.plateIds[condition].setdefault('+', [])  # list of puro+ plates for this screen:batch:condition
		self.counters[condition].setdefault('-', 0)   # count of puro- plates for this screen:batch:condition
		self.readIds[condition].setdefault('-', [])   # list of puro- assays for this screen:batch:condition
		self.plateIds[condition].setdefault('-', [])  # list of puro- plates for this screen:batch:condition
		if selStat == 'YES' and use:
			self.counters[condition]['+'] += 1               # increment puro+ counter
			self.readIds[condition]['+'].append(readId)      # append assay ID  
			self.plateIds[condition]['+'].append(plateId)    # append plate ID
		elif selStat == 'NO' and use:
			self.counters[condition]['-'] += 1               # increment puro+ counter
			self.readIds[condition]['-'].append(readId)      # append assay ID  
			self.plateIds[condition]['-'].append(plateId)    # append plate ID

	def printSelf(self):
		# print info about the object 
		print '%s:::' % self.vPlateId
		for condition in self.counters.keys():
			print ' %s: puro+: %d' % (condition, self.counters[condition]['+'])
			print '	 readIds: ', self.readIds[condition]['+']
			print ' %s: puro-: %d' % (condition, self.counters[condition]['-'])
			print '	 readIds: ', self.readIds[condition]['-']

	def updateIeData(self, ieData, plateSet, vPlateSet):
		# (re)compute the infection efficiency
		#print 'updateIeData: vPlate: %s, conditions:' % (self.vPlateId), self.counters.keys()
		for condition in self.counters.keys():
			if self.counters[condition]['+'] > 0 and self.counters[condition]['-'] > 0:
				# there is enough data to perform infection efficiency computations:
				ieData.setdefault(self.vPlateId, {})
				ieData[self.vPlateId].setdefault(condition, IEdata(self.vPlateId, condition))
				ieData[self.vPlateId][condition].computeIE(self.plateIds[condition]['-'],
                                                                           self.readIds[condition]['-'],
                                                                           self.plateIds[condition]['+'],
                                                                           self.readIds[condition]['+'],
                                                                           plateSet, vPlateSet)
				# assign IEdata object to each plate used in the IE computation:
				for plateId in self.plateIds[condition]['-']:
					plateSet[plateId].setIeData(ieData[self.vPlateId][condition])
				for plateId in self.plateIds[condition]['+']:
					plateSet[plateId].setIeData(ieData[self.vPlateId][condition])

		return ieData

# VirusPlate class definition: 
# This data is not dependent on the condition, screen ,etc. This data is read either from the database 
# or from the input flat file. The VirusPlate object contains a set of VpWell objects, which contain all
# of the relevent information about each source virus well.
# NOTE: If the data is read from a file, new data for a well will overwrite any previous data for that well.
class VirusPlate:
	def __init__(self, plateId):
		self.plateId = plateId        # virus plate ID
		self.plateName = None         # virus plate name
		self.well = {}                # dictionary of wells
		self.nControls = 0                # counter for the overall number of control wells (used by PoC normalization)
		self.nqControls = [0, 0, 0, 0]    # counters for the overall number of control wells in each 'quadrant'(used by PoC normalization)

	# add a well to the virus plate
	def addWell(self, row, col, virusId, virusName, sourceContig, sourceStart, sourceEnd, sourceStrand, symbol, prefName,
				 targetSeq, sourcePlateName, sourcePlateRow, soucePlateCol, quad, geneId, cloneId, cloneName, taxon, vtype, nWells):
		# count the number of control wells:
		if vtype == -1:
			# overall
			self.nControls += 1
			if nWells == 96:
				# if this is a 96-well plate, just increment the number of control wells in quadrant 1
				self.nqControls[0] += 1
			else:
				# otherwise, get the quadrant that this well maps to, then increment its quadrant counter
				iquad = get386Quadrant(row, col)
				self.nqControls[iquad] += 1

		self.well.setdefault(row, {})
		# create and add a VpWell to the well dictionary, indexed by row and column
		self.well[row][col] = VpWell(virusId, virusName, sourceContig, sourceStart, sourceEnd, sourceStrand,
									 symbol, prefName, targetSeq, sourcePlateName, sourcePlateRow, soucePlateCol,
									 quad, geneId, cloneId, cloneName, taxon, vtype)

	def addFileWell(self, row, col, cloneId, hpDesc, geneID, symbol, prefName, taxon, vtype):
		# same as above, when the data is read from a file, rather than teh database
		# count the number of control wells:
		if vtype == -1:
			self.nControls += 1
		self.well.setdefault(row, {})
		self.well[row][col] = VpWell(cloneId, None, None, None, None, None,
									 symbol, prefName, None, None, None, None,
									 None, geneID, cloneId, hpDesc, taxon, vtype)

	def set_plateName(self, vPlateName):
		# set the virus plate name
		self.plateName = vPlateName

	def get_plateName(self):
		# get the virus plate naem
		return self.plateName

	def getWellType(self, row, col):
		# get the virus well type
		try:
			return self.well[row][col].type
		except:
			return None

def get386Quadrant(row, col):
	# map a 386-well row and column to the corresponding quadrant
	rFact = (ALPHABET.index(row) % 2) * 2
	cFact = (int(col) - 1) % 2
	return (rFact + cFact)

# CloneInfo class definition: contains information for each clone
class CloneInfo:
	def __init__(self, sourceContig, sourceStart, sourceEnd, sourceStrand,
				 symbol, prefName, targetSeq, geneId, cloneName, taxon, vtype):
		self.sourceContig = sourceContig      # genomic contig
		self.sourceStart = sourceStart        # genomic start position
		self.sourceEnd = sourceEnd            # genomic end position
		self.sourceStrand = sourceStrand      # genomic strand
		self.symbol = symbol                  # gene symbol of target gene
		self.prefName = prefName              # preferred gene name
		self.targetSeq = targetSeq            # sequence targeted by the hairpin
		self.geneId = geneId                  # NCBI gene ID
		self.cloneName = cloneName            # name of the clone
		self.taxon = taxon                    # target gene taxon (by NCBI taxon code)
		self.vtype = vtype                    # virus type: 0=EMPTY, -2=pgwm -1=control, 1=valid, gene-targeting hairpin

# VpWell class definition: contains all relevant information about the source virus plate well
class VpWell:
	def __init__(self, virusId, virusName, sourceContig, sourceStart, sourceEnd, sourceStrand, symbol, prefName,
				 targetSeq, sourcePlateName, sourcePlateRow, sourcePlateCol, quad, geneId, cloneId, cloneName, taxon, vtype):
		self.virusId = virusId                # virus ID
		self.virusName = virusName            # virus name
		self.sourceContig = sourceContig      # genomic contig
		self.sourceStart = sourceStart        # genomic start position
		self.sourceEnd = sourceEnd            # genomic end position
		self.sourceStrand = sourceStrand      # genomic strand
		self.symbol = symbol                  # target gene symbol
		self.prefName = prefName              # preferred gene name
		self.targetSeq = targetSeq            # sequence targeted by the hairpin
		self.sourcePlateName = sourcePlateName    # source virus plate name (this plate may have been rearrayed from a different plate)
		self.sourcePlateRow = sourcePlateRow      # source virus plate row
		self.sourcePlateCol = sourcePlateCol      # source virus plate column
		self.quad = quad                      # virus plate quadrant
		self.geneId = geneId                  # NCBI gene ID
		self.cloneId = cloneId                # TRC clone ID
		self.cloneName = cloneName            # TRC clone name
		self.taxon = taxon                    # target gene taxon (by NCBI taxon code)
		self.type = vtype                     # virus type: 0=EMPTY, -2=pgwm -1=control, 1=valid, gene-targeting hairpin

	def printWell(self, row, col):
		# print out the well info (for debugging)
		print '%s%s:' % (row, col)
		print " type: ", self.type
		print " virusId: ", self.virusId
		print " virusName: ", self.virusName
		#print " sourceContig: ",self.sourceContig
		#print " sourceStart: ",self.sourceStart
		#print " sourceEnd: ",self.sourceEnd
		#print " sourceStrand: ",self.sourceStrand
		print " symbol: ", self.symbol
		print " prefName: ", self.prefName
		#print " targetSeq: ",self.targetSeq
		print " sourcePlateName: ", self.sourcePlateName
		#print " sourcePlateRow: ",self.sourcePlateRow
		#print " sourcePlateCol: ",self.sourcePlateCol
		#print " quad: ",self.quad
		print " geneId: ", self.geneId
		print " cloneId: ", self.cloneId
		print " cloneName: ", self.cloneName
		#print " taxon: ",self.taxon

# Hairpin class description: this contains all identifying information about a hairpin
# AD: this class does not appear to be used anywhere
class Hairpin:
	def __init__(self, hpName, virusId, vType, scoring, prefName, symbol, geneId, taxonId, cloneID,
				 hpSeq, transTgt, contig, start, end, strand):
		self.hpName = hpName      # hairpin name
		self.scoring = scoring    
		self.virusId = virusId
		self.cloneID = cloneID
		self.vType = vType
		self.transTgt = transTgt
		self.hpSeq = hpSeq
		self.symbol = symbol
		self.prefName = prefName
		self.geneId = geneId
		self.taxonId = taxonId
		self.contig = contig
		self.start = start
		self.end = end
		self.strand = strand

		self.rawCount = {}
		self.rawSum = {}
		self.rawList = {}
		self.normCount = {}
		self.normSum = {}
		self.normList = {}
		self.ieCount = {}
		self.ieSum = {}
		self.ieList = {}

		self.rawMean = {}
		self.normMean = {}
		self.normMin = None
		self.normMax = None
		self.ieMean = {}
		self.rawStd = {}
		self.normStd = {}
		self.ieStd = {}
		self.normConf = {}
		self.confMin = None
		self.confMax = None

	def resetLimits(self):
		self.normMin = None
		self.normMax = None
		self.confMin = None
		self.confMax = None

	def updateHairpin(self, cond, batch, selStat, raw, norm, ie, ieMask, hpName, cloneId):
		#if cloneId!=self.cloneID:
			#if hpName.startswith('NM'):
				#print 'Hairpin: %s\torig cnID: %s\tthis cnID: %s' % (hpName, self.cloneID, cloneId)

		self.resetLimits()
		self.rawCount.setdefault(cond, {})
		self.rawCount[cond].setdefault(selStat, {})
		self.rawCount[cond][selStat].setdefault(batch, 0)
		self.rawSum.setdefault(cond, {})
		self.rawSum[cond].setdefault(selStat, {})
		self.rawSum[cond][selStat].setdefault(batch, 0)
		self.rawMean.setdefault(cond, {})
		self.rawMean[cond].setdefault(selStat, {})
		self.rawMean[cond][selStat].setdefault(batch, 0)
		self.rawStd.setdefault(cond, {})
		self.rawStd[cond].setdefault(selStat, {})
		self.rawStd[cond][selStat].setdefault(batch, 0)
		self.normCount.setdefault(cond, {})
		self.normCount[cond].setdefault(selStat, {})
		self.normCount[cond][selStat].setdefault(batch, 0)
		self.normSum.setdefault(cond, {})
		self.normSum[cond].setdefault(selStat, {})
		self.normSum[cond][selStat].setdefault(batch, 0)
		self.normMean.setdefault(cond, {})
		self.normMean[cond].setdefault(selStat, {})
		self.normMean[cond][selStat].setdefault(batch, 0)
		self.normStd.setdefault(cond, {})
		self.normStd[cond].setdefault(selStat, {})
		self.normStd[cond][selStat].setdefault(batch, 0)
		self.normConf.setdefault(cond, {})
		self.normConf[cond].setdefault(selStat, {})
		self.normConf[cond][selStat].setdefault(batch, 0)
		self.ieCount.setdefault(cond, {})
		self.ieCount[cond].setdefault(selStat, {})
		self.ieCount[cond][selStat].setdefault(batch, 0)
		self.ieSum.setdefault(cond, {})
		self.ieSum[cond].setdefault(selStat, {})
		self.ieSum[cond][selStat].setdefault(batch, 0)
		self.ieMean.setdefault(cond, {})
		self.ieMean[cond].setdefault(selStat, {})
		self.ieMean[cond][selStat].setdefault(batch, 0)
		self.ieStd.setdefault(cond, {})
		self.ieStd[cond].setdefault(selStat, {})
		self.ieStd[cond][selStat].setdefault(batch, 0)

		self.rawList.setdefault(cond, {})
		self.rawList[cond].setdefault(selStat, {})
		self.rawList[cond][selStat].setdefault(batch, [])
		self.normList.setdefault(cond, {})
		self.normList[cond].setdefault(selStat, {})
		self.normList[cond][selStat].setdefault(batch, [])
		self.ieList.setdefault(cond, {})
		self.ieList[cond].setdefault(selStat, {})
		self.ieList[cond][selStat].setdefault(batch, [])

		# ignore if ieMask indicates that this sample should be ignored:
		if ieMask == 0:
			return

		try:
			self.rawSum[cond][selStat][batch] += float(raw)
			self.rawList[cond][selStat][batch].append(float(raw))
			self.rawCount[cond][selStat][batch] += 1
		except:
			pass

		try:
			self.ieSum[cond][selStat][batch] += float(ie)
			self.ieList[cond][selStat][batch].append(float(ie))
			self.ieCount[cond][selStat][batch] += 1
		except:
			pass

		try:
			self.normSum[cond][selStat][batch] += float(norm)
			self.normList[cond][selStat][batch].append(float(norm))
			self.normCount[cond][selStat][batch] += 1
		except:
			pass

	def getHpHeader(self):
		header1 = 'condition\tbatch\thairpin name\tselection status\tcount'
		#header2 = 'raw mean\tscore mean\tscore STD\tscore Z\tIE mean'
		header2 = 'raw mean\tscore mean\tscore STD\tIE mean'
		header3 = 'virus ID\ttarget trans.\tsymbol\tpref. name\tgene ID\ttaxon ID\ttarget seq.\tcontig\tstart\tend\tstrand'
		header = '%s\t%s\t%s\n' % (header1, header2, header3)
		return header

	def getHpScores(self):
		scores = {}
		for cond in self.rawCount.keys():
			scores[cond] = {}
			for selStat in self.rawCount[cond].keys():
				scores[cond][selStat] = {}
				for batch in self.rawCount[cond][selStat].keys():
					if self.rawCount[cond][selStat][batch] > 0:
						normMean = self.normSum[cond][selStat][batch] / self.normCount[cond][selStat][batch]
						normStd = myStdev(self.normList[cond][selStat][batch])
					else:
						normMean = None
						normStd = None
					scores[cond][selStat][batch] = [normMean, normStd]
		return scores

	def getHpLine(self):
		outStr = ''
		for cond in self.rawCount.keys():
			for selStat in self.rawCount[cond].keys():
				for batch in self.rawCount[cond][selStat].keys():

					if self.rawCount[cond][selStat][batch] > 0:
						self.rawMean[cond][selStat][batch] = self.rawSum[cond][selStat][batch] / self.rawCount[cond][selStat][batch]
						rawMean = '%6.3f' % self.rawMean[cond][selStat][batch]

					else:
						self.rawMean[cond][selStat][batch] = None
						rawMean = 'N/A'

					if self.ieCount[cond][selStat][batch] > 0:
						self.ieMean[cond][selStat][batch] = self.ieSum[cond][selStat][batch] / self.ieCount[cond][selStat][batch]
						ieMean = '%6.3f' % self.ieMean[cond][selStat][batch]
					else:
						self.ieMean[cond][selStat][batch] = None
						ieMean = 'N/A'

					if self.normCount[cond][selStat][batch] > 0:
						self.normMean[cond][selStat][batch] = self.normSum[cond][selStat][batch] / self.normCount[cond][selStat][batch]
						normMean = '%6.3f' % self.normMean[cond][selStat][batch]

						# save min/max normMean:
						if self.normMin == None or self.normMean[cond][selStat][batch] < self.normMin:
							self.normMin = self.normMean[cond][selStat][batch]
						if self.normMax == None or self.normMean[cond][selStat][batch] > self.normMax:
							self.normMax = self.normMean[cond][selStat][batch]

						self.normStd[cond][selStat][batch] = myStdev(self.normList[cond][selStat][batch])
						normStd = '%6.3f' % self.normStd[cond][selStat][batch]
						self.normConf[cond][selStat][batch] = hpConf(self.normCount[cond][selStat][batch],
																	 self.normMean[cond][selStat][batch],
																	 self.normStd[cond][selStat][batch])
						# save min/max normConf:
						if self.confMin == None or self.normConf[cond][selStat][batch] < self.confMin:
							self.confMin = self.normConf[cond][selStat][batch]
						if self.confMax == None or self.normConf[cond][selStat][batch] > self.confMax:
							self.confMax = self.normConf[cond][selStat][batch]

						#normConf = '%6.3f' % self.normConf[cond][selStat][batch]
					else:
						self.normMean[cond][selStat][batch] = None
						normMean = 'N/A'
						self.normStd[cond][selStat][batch] = None
						normStd = 'N/A'
						self.normConf[cond][selStat][batch] = None
						#normConf = 'N/A'

					if self.rawCount[cond][selStat][batch] > 0:
						outStr1 = '%s\t%s\t%s\t%s\t%d' % (cond, batch, self.hpName, selStat,
														  self.rawCount[cond][selStat][batch])
						outStr2 = '%s\t%s\t%s\t%s' % (rawMean, normMean, normStd, ieMean)
						outStr3 = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s' % \
								  (self.virusId, self.transTgt, self.symbol, self.prefName,
								   self.geneId, self.taxonId, self.hpSeq, self.contig, self.start,
								   self.end, self.strand)
						outStr = '%s%s\t%s\t%s\n' % (outStr, outStr1, outStr2, outStr3)
		return outStr

	def getRigerCondIdList(self, condIdList, nCells):
		for cond in self.rawCount.keys():
			for selStat in self.rawCount[cond].keys():
				# create the puro selection string:
				if selStat.upper().startswith('PURO'):
					if selStat.upper().endswith('YES'):
						selStr = 'p+'
					elif selStat.upper().endswith('NO'):
						selStr = 'p-'
					else:
						selStr = 'p*'
				else:
					selStr = '%s' % selStat

				for batch in self.rawCount[cond][selStat].keys():
					# make the string representation of this experiment condition:
					if cond == '':
						gseaCond = '%s_%s' % (batch, selStr)
					else:
						gseaCond = '%s_%s_%s' % (batch, cond, selStr)

					nCells += 1
					# update list if necessary:
					if condIdList.count(gseaCond) == 0:
						condIdList.append(gseaCond)

		#print 'nCells so far: %d' % nCells
		return (condIdList, nCells)

	def getRigerLine(self, condIdList, hpName=None):
		#hpDesc = '%s:%s:%s' % (hpName, self.symbol, self.prefName)
		hpDesc = '%s_%s_%s' % (hpName, self.geneId, self.prefName)
		outStr = '%s\t%s' % (self.virusId, hpDesc)
		scoreMap = {}
		for cond in self.rawCount.keys():
			for selStat in self.rawCount[cond].keys():
				# create the puro selection string:
				if selStat.upper().startswith('PURO'):
					if selStat.upper().endswith('YES'):
						selStr = 'p+'
					elif selStat.upper().endswith('NO'):
						selStr = 'p-'
					else:
						selStr = 'p*'
				else:
					selStr = '%s' % selStat

				for batch in self.rawCount[cond][selStat].keys():
					# make the string representation of this experiment condition:
					if cond == '':
						gseaCond = '%s_%s' % (batch, selStr)
					else:
						gseaCond = '%s_%s_%s' % (batch, cond, selStr)

					# make the string representation of the score mean:
					if self.normCount[cond][selStat][batch] > 0:
						normMeanVal = self.normSum[cond][selStat][batch] / self.normCount[cond][selStat][batch]
						normMean = '%6.3f' % normMeanVal

					else:
						normMean = ''

					#outStr = '%s%s\t%s\t%s\t%s\n' % (outStr, self.virusId, gseaCond, hpDesc, normMean)
					scoreMap[gseaCond] = normMean

		testStr = ''
		for gseaCond in condIdList:
			scoreStr = scoreMap.get(gseaCond, '')
			outStr = '%s\t%s' % (outStr, scoreStr)
			testStr = '%s%s' % (testStr, scoreStr)

		# if there are no results for any of the test conditions, return '':
		if len(testStr) == 0:
			outStr = ''

		#outStr = safeCharacterString(outStr)
		return outStr

def safeCharacterString(string):
	# used to fix characters that are not allowed by GENE-E/RIGER
	safename = string.replace('@', '_')
	safename = safename.replace('#', '_')
	safename = safename.replace(' ', '_')
	safename = safename.replace('%', '_')
	safename = safename.replace('$', '_')
	safename = safename.replace(':', '_')
	safename = safename.replace('*', '_')
	safename = safename.replace('\\', '_')
	safename = safename.replace('/', '_')
	return safename

# another median computation function
# AD: this doesn't appear to be used
def myMedianscore(dList):
	if len(dList) == 1:
		return dList[0]

	# sort the input list:
	nList = dList[0:]
	nList.sort()

	lMod = len(nList) % 2
	lMid = len(nList) / 2
	if lMod:	   # odd length
		return nList[lMid]
	else:		  # even length
		return (nList[lMid - 1] + nList[lMid]) / 2.0

# standard deviation function
def myStdev(dList):
	M_SUM = 0.0
	for x in dList:
		M_SUM += x
	D_MEAN = M_SUM / len(dList)
	SD_SUM = 0.0
	for x in dList:
		SD_SUM += pow((x - D_MEAN), 2)
	return math.sqrt(SD_SUM / len(dList))

# hairpin "confidence" algorithm
def hpConf(N, dMean, dStd):
	if N == 1:
		return - 10000.0
	elif dMean == 0.0:
		return 0.0
	else:
		#return dStd/dMean
		return dStd
#		return abs(dMean-dStd)/dStd

# Gene class definition: contains information needed for gene-level scoring
class Gene:
	def __init__(self, modelHp):
		self.hpList = []                  # list of hairpins targeting this gene
		self.geneId = modelHp.geneId      # NCBI gene ID
		self.taxonId = modelHp.taxonId    # taxon ID
		self.symbol = modelHp.symbol      # gene symbol
		self.transTgt = modelHp.transTgt  # target transcription
		self.prefName = modelHp.prefName  # preferred gene name
		self.contig = modelHp.contig      # genomic contig ID
		self.start = modelHp.start        # genomic start position
		self.end = modelHp.end            # genomic end location
		self.strand = modelHp.strand      # genomic strand
		self.scoring = modelHp.scoring    # scoring/normalization method used
		self.minScore = None              # minimum score for a hairpin for this gene
		self.maxScore = None              # maximum score for a hairpin for this gene
		self.minConf = None               # minimum confidence for any hairpin for this gene
		self.maxConf = None               # maximum confidence for any hairpin for this gene

	def resetLimits(self):
		# reset all mins and maxes to None
		self.minScore = None
		self.maxScore = None
		self.minConf = None
		self.maxConf = None

	def addHairpin(self, hairpin):
		# add a hairpin to the hairpin list for this gene
		if self.hpList.count(hairpin) == 0:
			self.hpList.append(hairpin)

	def getHpCounts(self, bCombine):
		# get the counts for the various hairpins for this gene
		outStr = ''
		selCount = {}
		for hairpin in self.hpList:
			# get information foreach hairpin...
			for cond in hairpin.normCount.keys():
				# ... and each condition separately
				selCount.setdefault(cond, {})
				for selStat in hairpin.normCount[cond].keys():
					if bCombine:
						# if combining results for all batches, sum over all batches
						selCount[cond].setdefault(selStat, 0)
						# group counts for all batches together:
						for batch in hairpin.normCount[cond][selStat].keys():
							selCount[cond][selStat] += hairpin.normCount[cond][selStat][batch]
					else:
						# otherwise, separate the counts by batch
						selCount[cond].setdefault(selStat, {})
						# separate counts by batch:
						for batch in hairpin.normCount[cond][selStat].keys():
							selCount[cond][selStat][batch] = hairpin.normCount[cond][selStat][batch]

		# make a list of all conditions
		condList = selCount.keys()
		condList.sort()

		# build the output string:
		for cond in condList:
			# loop over all conditions...
			for selStat in selCount[cond].keys():
				# and puro+ and puro-
				if bCombine:
					# if combining all batches
					if outStr == '':
						# create the output string...
						outStr = '%s: %s: %d hairpins' % (cond, selStat, selCount[cond][selStat])
					else:
						# ... or append to it
						outStr = '%s\t%s : %s: %d hairpins' % (outStr, cond, selStat, selCount[cond][selStat])
				else:
					# separate batch info
					for batch in hairpin.normCount[cond][selStat].keys():
						if outStr == '':
							# create the output string...
							outStr = '%s: %s: %s: %d hairpins' % (cond, batch, selStat,
																  selCount[cond][selStat][batch])
						else:
							# ... or append to it
							outStr = '%s\t%s : %s: %s: %d hairpins' % (outStr, cond, batch, selStat,
																  selCount[cond][selStat][batch])

		# return the output string
		return outStr

	def getGeneScoreRange(self):
		# compute the min/max scores and confidences for the hairpins for this gene
		self.resetLimits()    # reset the limits
		# force computation of hairpin norm min and max:
		for hairpin in self.hpList:
			hairpin.getHpLine()
			# get the range of scores for this gene...:
			if self.minScore == None or hairpin.normMin < self.minScore:
				self.minScore = hairpin.normMin
			if self.maxScore == None or hairpin.normMax > self.maxScore:
				self.maxScore = hairpin.normMax

			# ... and the range of confidence for this gene...:
			if self.minConf == None or hairpin.confMin < self.minConf:
				self.minConf = hairpin.confMin
			if self.maxConf == None or hairpin.confMax > self.maxConf:
				self.maxConf = hairpin.confMax

		return (self.minScore, self.maxScore, self.minConf, self.maxConf)

	def getHairpinScores(self):
		# get all the scores for all the hairpins for this gene
		hpScores = {}
		for hairpin in self.hpList:
			# loop over all hairpins...
			hpScores.setdefault(hairpin, {})
			for cond in hairpin.normMean.keys():
				# ... for all conditions...
				hpScores[hairpin].setdefault(cond, {})
				for selStat in hairpin.normMean[cond].keys():
					# ... puro+ and puro- ...
					hpScores[hairpin][cond].setdefault(selStat, {})
					for batch in hairpin.normMean[cond][selStat].keys():
						# ... and all batches.
						score = hairpin.normMean[cond][selStat][batch]
						conf = hairpin.normConf[cond][selStat][batch]
						hpName = hairpin.hpName
						hpScores[hairpin][cond][selStat][batch] = [hpName, score, conf]
		return hpScores

def fieldSort(a, b):
	# specialized sort for a list of two-element lists
	# sort on the first element first:
	if a[0] < b[0]:
		return - 1
	elif a[0] > b[0]:
		return 1
	# sort on the second element if the first elements are the same
	elif a[1] < b[1]:
		return - 1
	elif a[1] > b[1]:
		return 1
	else:
		# consider them equal if both the first and second elements are the same
		return 0

# PlateSet class definition: used to group a set of plates together (either screening plates or virus plates)
class PlateSet:
	def __init__(self, plateSetName):
		self.name = plateSetName  # the name of the PlateSet
		self.plate = {}           # dictionary of plaes in the set
		self.normMethod = None    # normalization method used for this plate set (if any)
		self.quad = 0             # whole-plate (0) or quadrant-based (1) normalization

        def getReadIds(self):
                readIds = []
                for plateId in self.getSortedPlateIdList():
                        plate = self.getPlate(plateId)
                        if not plate.use:
                                continue
                        for readId in plate.get_readIdList():
                                if not readId in readIds:
                                        readIds.append(readId)
                readIds.sort()
                return readIds

        def getAssayForReadId(self, readId):
                for plateId in self.getSortedPlateIdList():
                        plate = self.getPlate(plateId)
                        if not plate.use:
                                continue
                        for readIdInList in plate.get_readIdList():
                                if readIdInList == readId:
                                        return plate.getAssay(readId)
                return None

	def addPlate(self, plate):
		# add a plate to the set
		self.plate[plate.plateId] = plate

	def getPlate(self, plateId):
		# get a plate by plateID
		try:
			return(self.plate[plateId])
		except:
			print self.plate.keys()
			return None

	def getUseAssay(self, assayKey):
		# get the 'use' flag for an assay in this plate set
		assayFields = assayKey.split(':')
		# the assay key is a <plateID>:<assayID> string
		plateId = assayFields[0]
		readId = assayFields[1]
		return self.plate[plateId].use

	def setUseAssay(self, assayKey, value):
		# set the 'use' flag for an assay in this plate set
		assayFields = assayKey.split(':')
		plateId = assayFields[0]
		readId = assayFields[1]
		#self.plate[plateId].getAssay(readId).setUse(value)
		self.plate[plateId].use = value

	def getPlateIdList(self):
		# get the list of plateIDs in this plate set
		return(self.plate.keys())

	def getSortedPlateIdList(self):
		# sort plates by vPlateId:
		idList = self.plate.keys()
                idList.sort()
		return idList

	def getPlateIdListSortedByVplate(self):
		# get the list of plate IDs, sorted by the virus plate used
		idList = self.plate.keys()    # get the list of plate IDs
		screenList = []   # initialize screen list
		batchList = []    # initialize batch list
		plateIds = {}     # intialize plate ID dictionary
		for plateId in idList:
			# build the dictionary of plate IDs as a heirarchy of virus plate, screen, batch, and selection status
			plate = self.getPlate(plateId)
			vPlateId = plate.virusPlateId
			plateIds.setdefault(vPlateId, {})
			screen = plate.screenId
			plateIds[vPlateId].setdefault(screen, {})
			batch = plate.batchId
			plateIds[vPlateId][screen].setdefault(batch, {})
			selStat = plate.selectionStatus
			plateIds[vPlateId][screen][batch].setdefault(selStat, [])
			plateIds[vPlateId][screen][batch][selStat].append(plateId)

		# initialize the output plate ID list:
		plateIdList = []
		for vPlateId in plateIds.keys():
			for screen in plateIds[vPlateId].keys():
				for batch in plateIds[vPlateId][screen].keys():
					for selStat in plateIds[vPlateId][screen][batch].keys():
						for plateId in plateIds[vPlateId][screen][batch][selStat]:
							plateIdList.append(plateId)

		return plateIdList

	def getRawData(self, plateId):
		# get the raw data for this plate
		return(self.plate[plateId].getRaw())

	def addKeyValuePair(self, key, value):
		# add a general key:value pair to the list
		self.kvp[key] = value

	def getValue(self, key):
		# get hte value of a key:value pair using the key
		try:
			return(self.kvp[key])
		except:
			return None

	def setQuad(self, quadFlag):
		# set the quadrant/plate flag
		self.quad = quadFlag

	def getQuad(self):
		# get the quadrant/plate flag
		return self.quad

	def normalizePlates(self, method, lowThresh, highThresh):
		# set how control wells are to be treated:
		controlWells = masterObject.getField('controlWells')

		# get the list of plate ID for this plate set
		idList = self.getPlateIdList()
		for plateId in idList:
			# get the plate
			plate = self.getPlate(plateId)
			# get the method for handling control wells
			plate.controlWells = controlWells

			# normalize the plate using the selected method
			if method == 'b-score':
				plate.bScore(self.quad)
			elif method == 'z-score':
				plate.zScore(self.quad, 'normal')
			elif method == 'robust z-score':
				plate.zScore(self.quad, 'robust')
			elif method == 'PoC':
				plate.PoC(self.quad)
			else:
				plate.noNorm()

