public TargetDatabase Process(IEnumerable <LcmsDataSet> dataSets, BackgroundWorker bWorker) { m_abortRequested = false; m_currentItem = 0; dataSets = dataSets.ToList(); m_totalItems = 2 * dataSets.Count(); OnPercentProgressChanged(new PercentCompleteEventArgs(0)); // Deal with DataSetId - Auto increments - Not in this class only var evidenceMap = new Dictionary <int, Evidence>(); var targetDatabase = new TargetDatabase(); var aligner = TargetAlignmentFactory.Create(ProcessorOptions); var clusterer = TargetClustererFactory.Create(ProcessorOptions.TargetFilterType); var epicTargets = new List <Evidence>(); foreach (var dataSet in dataSets) { float percentComplete = (float)m_currentItem / m_totalItems; UpdateProgress(m_currentItem, m_totalItems, percentComplete, "Determining Consensus Targets"); if (bWorker.CancellationPending || m_abortRequested) { return(targetDatabase); } var targetFilter = TargetFilterFactory.Create(dataSet.Tool, ProcessorOptions); var alignmentFilter = AlignmentFilterFactory.Create(dataSet.Tool, ProcessorOptions); var filteredTargets = new List <Evidence>(); var alignedTargets = new List <Evidence>(); foreach (var t in dataSet.Evidences) { // Exclude carryover peptides. // Would be evidenced by a sizable difference between observed net and predicted net if (t.ObservedNet >= ProcessorOptions.MinimumObservedNet && t.ObservedNet <= ProcessorOptions.MaximumObservedNet) { // To prevent filtration of evidences which have previously passed alignment, if (dataSet.PreviouslyAnalyzed || !targetFilter.ShouldFilter(t)) { filteredTargets.Add(t); if (!alignmentFilter.ShouldFilter(t)) { alignedTargets.Add(t); } } } } epicTargets.AddRange(filteredTargets); if (ProcessorOptions.TargetFilterType == TargetWorkflowType.TOP_DOWN) { dataSet.RegressionResult = aligner.AlignTargets(filteredTargets, alignedTargets); } m_currentItem++; } //Create the database (the list of consensus targets) //Convert the list of targets into a list of MassTagLights for LCMS to use as baseline // Cluster initially to provide a baseline for LCMSWarp var newTargets = clusterer.Cluster(epicTargets); int i = 0, j = 0; var tempConsensusTargets = new List <ConsensusTarget>(); var proteinDict = new Dictionary <string, ProteinInformation>(); foreach (var consensusTarget in newTargets) { consensusTarget.Id = ++i; foreach (var target in consensusTarget.Evidences) { target.Id = ++j; } consensusTarget.CalculateStatistics(); tempConsensusTargets.Add(consensusTarget); } var massTagLightTargets = new List <UMCLight>(); foreach (var evidence in tempConsensusTargets) { var driftStart = double.MaxValue; var driftEnd = double.MinValue; foreach (var member in evidence.Evidences) { driftStart = Math.Min(member.Scan, driftStart); driftEnd = Math.Max(member.Scan, driftEnd); } massTagLightTargets.AddRange(evidence.Charges.Select(charge => new UMCLight { Net = evidence.PredictedNet, ChargeState = charge, Mz = (evidence.TheoreticalMonoIsotopicMass + (charge * 1.00727649)) / charge, MassMonoisotopic = evidence.TheoreticalMonoIsotopicMass, Id = evidence.Id, MassMonoisotopicAligned = evidence.TheoreticalMonoIsotopicMass, DriftTime = driftEnd - driftStart, Scan = (int)((driftStart + driftEnd) / 2.0), ScanStart = (int)driftStart, ScanEnd = (int)driftEnd, })); } if (bWorker.CancellationPending || m_abortRequested) { return(targetDatabase); } var alignmentData = new List <LcmsWarpAlignmentData>(); var options = new LcmsWarpAlignmentOptions(); var lcmsAligner = new LcmsWarpAdapter(options); //For performing net warping without mass correction options.AlignType = PNNLOmics.Algorithms.Alignment.LcmsWarp.AlignmentType.NET_WARP; var lcmsNetAligner = new LcmsWarpAdapter(options); //Foreach dataset foreach (var dataSet in dataSets) { float percentComplete = (float)m_currentItem / m_totalItems; UpdateProgress(m_currentItem, m_totalItems, percentComplete, "Performing LCMSWarp Alignment"); if (bWorker.CancellationPending || m_abortRequested) { return(targetDatabase); } var umcDataset = new List <UMCLight>(); if (dataSet.Tool == LcmsIdentificationTool.MSAlign) { continue; } dataSet.Evidences.Sort((x, y) => x.Scan.CompareTo(y.Scan)); var evidenceAndUmc = new List <EvidenceUMCAssociation>(); // Only put evidences that pass the minimum observed net in this list. var backupDataset = new List <UMCLight>(); foreach (var evidence in dataSet.Evidences) { if (evidence.ObservedNet >= ProcessorOptions.MinimumObservedNet) { UMCLight umc = new UMCLight { Net = evidence.ObservedNet, ChargeState = evidence.Charge, Mz = evidence.Mz, Scan = evidence.Scan, MassMonoisotopic = evidence.MonoisotopicMass, MassMonoisotopicAligned = evidence.MonoisotopicMass, Id = evidence.Id, ScanStart = evidence.Scan, ScanEnd = evidence.Scan, }; umcDataset.Add(umc); backupDataset.Add(umc); evidenceAndUmc.Add(new EvidenceUMCAssociation(evidence, umc)); } } umcDataset.Sort((x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic)); LcmsWarpAlignmentData alignedData; try { alignedData = lcmsAligner.Align(massTagLightTargets, umcDataset); } catch { try { alignedData = lcmsNetAligner.Align(massTagLightTargets, umcDataset); } catch { alignedData = null; } } var netDiffList = new List <double>(); var numBins = Math.Min(50, dataSet.Evidences.Count); var medNetDiff = new double[numBins]; var numPerBin = (int)Math.Ceiling((double)dataSet.Evidences.Count / numBins); var binNum = 0; //Copy the residual data back into the evidences foreach (var group in evidenceAndUmc) { group.Evidence.MonoisotopicMass = group.UMC.MassMonoisotopicAligned; var netShift = group.UMC.NetAligned - group.UMC.Net; netDiffList.Add(netShift); group.Evidence.NetShift = netShift; group.Evidence.ObservedNet += netShift; if (netDiffList.Count % numPerBin == 0) { medNetDiff[binNum] = netDiffList.Median(); netDiffList.Clear(); binNum++; } } if (netDiffList.Count != 0) { medNetDiff[binNum] = netDiffList.Median(); netDiffList.Clear(); } foreach (var data in dataSet.Evidences.Where(data => !evidenceMap.ContainsKey(data.Id))) { evidenceMap.Add(data.Id, data); } if (alignedData != null) { dataSet.RegressionResult.Slope = alignedData.NetSlope; dataSet.RegressionResult.Intercept = alignedData.NetIntercept; dataSet.RegressionResult.RSquared = alignedData.NetRsquared; alignmentData.Add(alignedData); } else { dataSet.RegressionResult.Slope = 1; dataSet.RegressionResult.Intercept = 0; dataSet.RegressionResult.RSquared = 0; } m_currentItem++; } if (AlignmentComplete != null) { AlignmentComplete(this, new AlignmentCompleteArgs(alignmentData)); } if (ProcessorOptions.TargetFilterType != TargetWorkflowType.TOP_DOWN) { i = j = 0; foreach (var consensus in tempConsensusTargets) { for (var evNum = 0; evNum < consensus.Evidences.Count; evNum++) { consensus.Evidences[evNum] = evidenceMap[consensus.Evidences[evNum].Id]; } //Recalculate the target's data from the warped values consensus.Id = ++i; foreach (var target in consensus.Evidences) { target.Id = ++j; } consensus.CalculateStatistics(); targetDatabase.AddConsensusTarget(consensus); foreach (var protein in consensus.Proteins) { if (!proteinDict.ContainsKey(protein.ProteinName)) { proteinDict.Add(protein.ProteinName, protein); // Don't need to manually link the first consensus to the protein continue; } proteinDict[protein.ProteinName].Consensus.Add(consensus); } } targetDatabase.Proteins = proteinDict.Values.ToList(); } return(targetDatabase); }
public TargetDatabase ReadDb(string path) { // Read in the data from the access database // put it into a text file (?) // Read the data from the text file into program var accApplication = new ACCESS.Application(); var pathPieces = path.Split('\\'); string directory = ""; foreach (var piece in pathPieces) { if (piece.Contains(".")) { continue; } directory += piece; directory += "\\"; } accApplication.OpenCurrentDatabase(path); accApplication.DoCmd.TransferText(TransferType: ACCESS.AcTextTransferType.acExportDelim, TableName: "AMT", FileName: directory + "outTempAMT.txt", HasFieldNames: true); accApplication.DoCmd.TransferText(TransferType: ACCESS.AcTextTransferType.acExportDelim, TableName: "AMT_Proteins", FileName: directory + "outTempAMT_Proteins.txt", HasFieldNames: true); accApplication.DoCmd.TransferText(TransferType: ACCESS.AcTextTransferType.acExportDelim, TableName: "AMT_to_Protein_Map", FileName: directory + "outTempAMT_to_Protein_Map.txt", HasFieldNames: true); accApplication.CloseCurrentDatabase(); accApplication.Quit(); // Put the data into its objects // AMT stuff going in Consensus targets // NET, MonoMass, Pred. Net, Peptide (Sequence with numeric mods), ID (can be crushed later) // OBSERVED <-- number of times this peptide was seen in the AMT // for <observed> times, add an evidence with the info? would make sense and would allow the stats calcs to be accurate // Prot stuff going into ProteinInfo // Prot name only thing important for MTDB, ID (can be crushed later) // AMT map // Link Consensus and Protein (ct[ct_id].protein.add(protein[prot_id])) var consensusTargets = new Dictionary <int, ConsensusTarget>(); var proteins = new Dictionary <int, ProteinInformation>(); var ctReader = new StreamReader(directory + "outTempAMT.txt"); var protReader = new StreamReader(directory + "outTempAMT_Proteins.txt"); var mapReader = new StreamReader(directory + "outTempAMT_to_Protein_Map.txt"); // Read the headers for the files ctReader.ReadLine(); protReader.ReadLine(); mapReader.ReadLine(); // Read the first "Data" lines from the files var ctLine = ctReader.ReadLine(); var protLine = protReader.ReadLine(); var mapLine = mapReader.ReadLine(); while (ctLine != null) { var pieces = ctLine.Split(','); var target = new ConsensusTarget { Id = Convert.ToInt32(pieces[0]), TheoreticalMonoIsotopicMass = Convert.ToDouble(pieces[1]), AverageNet = Convert.ToDouble(pieces[2]), PredictedNet = Convert.ToDouble(pieces[3]), EncodedNumericSequence = pieces[6] }; var totalEvidences = Convert.ToInt32(pieces[4]); var normScore = Convert.ToDouble(pieces[5]); for (var evNum = 0; evNum < totalEvidences; evNum++) { var evidence = new Evidence { ObservedNet = target.AverageNet, ObservedMonoisotopicMass = target.TheoreticalMonoIsotopicMass, PredictedNet = target.PredictedNet, NormalizedScore = normScore, SeqWithNumericMods = target.EncodedNumericSequence, Parent = target }; target.Evidences.Add(evidence); } consensusTargets.Add(target.Id, target); ctLine = ctReader.ReadLine(); } while (protLine != null) { var pieces = protLine.Split(','); var protein = new ProteinInformation { ProteinName = pieces[1] }; proteins.Add(Convert.ToInt32(pieces[0]), protein); protLine = protReader.ReadLine(); } while (mapLine != null) { var pieces = mapLine.Split(','); consensusTargets[Convert.ToInt32(pieces[0])].AddProtein(proteins[Convert.ToInt32(pieces[1])]); mapLine = mapReader.ReadLine(); } ctReader.Close(); protReader.Close(); mapReader.Close(); File.Delete(directory + "outTempAMT.txt"); File.Delete(directory + "outTempAMT_Proteins.txt"); File.Delete(directory + "outTempAMT_to_Protein_Map.txt"); var database = new TargetDatabase(); foreach (var target in consensusTargets) { database.AddConsensusTarget(target.Value); } database.Proteins = proteins.Values.ToList(); return(database); }
private void ReadSqLite(string path) { // Don't read again if we just read the file if (path == m_lastReadFile) { return; } // Reset the data m_targetDb.ClearTargets(); m_lcmsDataDic.Clear(); //var sessionFactory = DatabaseReaderFactory.CreateSessionFactory(path); DatabaseFactory.DatabaseFile = path; DatabaseFactory.ReadOrAppend = true; var sessionFactory = DatabaseFactory.CreateSessionFactory(DatabaseType.SQLite); var readConsensus = new List <ConsensusTarget>(); var readPair = new List <ConsensusProteinPair>(); var readProt = new List <ProteinInformation>(); var readEvidence = new List <Evidence>(); var readPtms = new List <PostTranslationalModification>(); var readPtmPairs = new List <ConsensusPtmPair>(); var readOptions = new List <Options>(); var consensusDic = new Dictionary <int, ConsensusTarget>(); var consensusProtDic = new Dictionary <int, List <ConsensusProteinPair> >(); var consensusPtmDic = new Dictionary <int, List <ConsensusPtmPair> >(); var protDic = new Dictionary <int, ProteinInformation>(); var ptmDic = new Dictionary <int, PostTranslationalModification>(); using (var session = sessionFactory.OpenStatelessSession()) { using (var transact = session.BeginTransaction()) { session.CreateCriteria <ProteinInformation>().List(readProt); session.CreateCriteria <ConsensusTarget>().List(readConsensus); session.CreateCriteria <PostTranslationalModification>().List(readPtms); session.CreateCriteria <Options>().List(readOptions); session.CreateCriteria <ConsensusProteinPair>().List(readPair); session.CreateCriteria <ConsensusPtmPair>().List(readPtmPairs); session.CreateCriteria <Evidence>().List(readEvidence); transact.Commit(); } /* * using (var transact = session.BeginTransaction()) * { * session.CreateCriteria<ConsensusProteinPair>().List(readPair); * session.CreateCriteria<ConsensusPtmPair>().List(readPtmPairs); * session.CreateCriteria<Evidence>().List(readEvidence); * transact.Commit(); * } */ foreach (var consensus in readConsensus) { consensus.Ptms.Clear(); //consensus.Evidences.Clear(); consensus.Evidences = new List <Evidence>(); consensus.Sequence = consensus.CleanSequence; m_targetDb.AddConsensusTarget(consensus); consensusDic.Add(consensus.Id, consensus); } foreach (var pair in readPair) { if (!consensusProtDic.ContainsKey(pair.Consensus.Id)) { consensusProtDic.Add(pair.Consensus.Id, new List <ConsensusProteinPair>()); } consensusProtDic[pair.Consensus.Id].Add(pair); } foreach (var pair in readPtmPairs) { if (!consensusPtmDic.ContainsKey(pair.Target.Id)) { consensusPtmDic.Add(pair.Target.Id, new List <ConsensusPtmPair>()); } consensusPtmDic[pair.Target.Id].Add(pair); } foreach (var prot in readProt) { protDic.Add(prot.Id, prot); } foreach (var ptm in readPtms) { ptmDic.Add(ptm.Id, ptm); } foreach (var consensus in consensusPtmDic) { foreach (var pair in consensus.Value) { var ptm = new PostTranslationalModification { Mass = ptmDic[pair.PostTranslationalModification.Id].Mass, Name = ptmDic[pair.PostTranslationalModification.Id].Name, Formula = ptmDic[pair.PostTranslationalModification.Id].Formula, Location = pair.Location, Parent = consensusDic[pair.Target.Id] }; consensusDic[pair.Target.Id].Ptms.Add(ptm); } } foreach (var evidence in readEvidence) { foreach (var pair in consensusProtDic[evidence.Parent.Id]) { var prot = protDic[pair.Protein.Id]; prot.ResidueEnd = pair.ResidueEnd; prot.ResidueStart = pair.ResidueStart; prot.TerminusState = (clsPeptideCleavageStateCalculator.ePeptideTerminusStateConstants)pair.TerminusState; prot.CleavageState = (clsPeptideCleavageStateCalculator.ePeptideCleavageStateConstants)pair.CleavageState; //prot.Id = 0; evidence.AddProtein(prot); consensusDic[evidence.Parent.Id].AddProtein(prot); } evidence.MonoisotopicMass = consensusDic[evidence.Parent.Id].TheoreticalMonoIsotopicMass; evidence.Ptms = consensusDic[evidence.Parent.Id].Ptms; if (!m_lcmsDataDic.ContainsKey(evidence.DataSet.Name)) { var dataset = new LcmsDataSet(true); m_lcmsDataDic.Add(evidence.DataSet.Name, dataset); m_lcmsDataDic[evidence.DataSet.Name].Name = evidence.DataSet.Name; m_lcmsDataDic[evidence.DataSet.Name].Tool = evidence.DataSet.Tool; } m_lcmsDataDic[evidence.DataSet.Name].Evidences.Add(evidence); consensusDic[evidence.Parent.Id].AddEvidence(evidence); } } // Set the member variable to avoid double reads. m_lastReadFile = path; }