public void TestLcmsWarpPort(string relativeBaselinePath, string relativeAligneePath, string relativeOutput, string name) { var baselinePath = GetPath(relativeBaselinePath); var aligneePath = GetPath(relativeAligneePath); var options = new LcmsWarpAlignmentOptions { AlignType = AlignmentType.NET_MASS_WARP, CalibrationType = LcmsWarpCalibrationType.Both }; var aligner = new LcmsWarpAdapter(options); var rawBaselineData = File.ReadAllLines(baselinePath); var rawFeaturesData = File.ReadAllLines(aligneePath); var outputPath = GetOutputPath(relativeOutput); var delimiter = new[] {TextDelimiter}; if (!Directory.Exists(outputPath)) { Directory.CreateDirectory(outputPath); } var baseline = (from line in rawBaselineData where line != "" select line.Split(delimiter, StringSplitOptions.RemoveEmptyEntries) into parsed select new UMCLight { Net = Convert.ToDouble(parsed[0]), ChargeState = Convert.ToInt32(parsed[1]), Mz = Convert.ToDouble(parsed[2]), Scan = Convert.ToInt32(parsed[3]), MassMonoisotopic = Convert.ToDouble(parsed[4]), MassMonoisotopicAligned = Convert.ToDouble(parsed[5]), Id = Convert.ToInt32(parsed[6]), ScanStart = Convert.ToInt32(parsed[7]), ScanEnd = Convert.ToInt32(parsed[8]), ScanAligned = Convert.ToInt32(parsed[9]) }).ToList(); var features = (from line in rawFeaturesData where line != "" select line.Split(delimiter, StringSplitOptions.RemoveEmptyEntries) into parsed select new UMCLight { Net = Convert.ToDouble(parsed[0]), ChargeState = Convert.ToInt32(parsed[1]), Mz = Convert.ToDouble(parsed[2]), Scan = Convert.ToInt32(parsed[3]), MassMonoisotopic = Convert.ToDouble(parsed[4]), MassMonoisotopicAligned = Convert.ToDouble(parsed[5]), Id = Convert.ToInt32(parsed[6]), ScanStart = Convert.ToInt32(parsed[7]), ScanEnd = Convert.ToInt32(parsed[8]), ScanAligned = Convert.ToInt32(parsed[9]) }).ToList(); var outputData = aligner.Align(baseline, features); var residuals = outputData.ResidualData; var heatmap = HeatmapFactory.CreateAlignedHeatmap(outputData.HeatScores); var netHistogram = HistogramFactory.CreateHistogram(outputData.NetErrorHistogram, "NET Error", "NET Error"); var massHistogram = HistogramFactory.CreateHistogram(outputData.MassErrorHistogram, "Mass Error", "Mass Error (ppm)"); var netResidual = ScatterPlotFactory.CreateResidualPlot(residuals.Scan, residuals.LinearCustomNet, residuals.LinearNet, "NET Residuals", "Scans", "NET"); var massMzResidual = ScatterPlotFactory.CreateResidualPlot(residuals.Mz, residuals.MzMassError, residuals.MzMassErrorCorrected, "Mass Residuals", "m/z", "Mass Errors"); var massScanResidual = ScatterPlotFactory.CreateResidualPlot(residuals.Scan, residuals.MzMassError, residuals.MzMassErrorCorrected, "Mass Residuals", "Scan", "Mass Errors"); var directory = Path.Combine(outputPath, name); var encoder = new SvgEncoder(); PlotImageUtility.SaveImage(heatmap, directory + "_heatmap.svg", encoder); PlotImageUtility.SaveImage(netResidual, directory + "_netResidual.svg", encoder); PlotImageUtility.SaveImage(massMzResidual, directory + "_massMzResidual.svg", encoder); PlotImageUtility.SaveImage(massScanResidual, directory + "_massScanResidual.svg", encoder); PlotImageUtility.SaveImage(netHistogram, directory + "_netHistogram.svg", encoder); PlotImageUtility.SaveImage(massHistogram, directory + "_massHistogram.svg", encoder); }
public TargetDatabase Process(IEnumerable <LcmsDataSet> dataSets, BackgroundWorker bWorker) { m_abortRequested = false; m_currentItem = 0; dataSets = dataSets.ToList(); m_totalItems = 2 * dataSets.Count(); OnPercentProgressChanged(new PercentCompleteEventArgs(0)); // Deal with DataSetId - Auto increments - Not in this class only var evidenceMap = new Dictionary <int, Evidence>(); var targetDatabase = new TargetDatabase(); var aligner = TargetAlignmentFactory.Create(ProcessorOptions); var clusterer = TargetClustererFactory.Create(ProcessorOptions.TargetFilterType); var epicTargets = new List <Evidence>(); foreach (var dataSet in dataSets) { float percentComplete = (float)m_currentItem / m_totalItems; UpdateProgress(m_currentItem, m_totalItems, percentComplete, "Determining Consensus Targets"); if (bWorker.CancellationPending || m_abortRequested) { return(targetDatabase); } var targetFilter = TargetFilterFactory.Create(dataSet.Tool, ProcessorOptions); var alignmentFilter = AlignmentFilterFactory.Create(dataSet.Tool, ProcessorOptions); var filteredTargets = new List <Evidence>(); var alignedTargets = new List <Evidence>(); foreach (var t in dataSet.Evidences) { // Exclude carryover peptides. // Would be evidenced by a sizable difference between observed net and predicted net if (t.ObservedNet >= ProcessorOptions.MinimumObservedNet && t.ObservedNet <= ProcessorOptions.MaximumObservedNet) { // To prevent filtration of evidences which have previously passed alignment, if (dataSet.PreviouslyAnalyzed || !targetFilter.ShouldFilter(t)) { filteredTargets.Add(t); if (!alignmentFilter.ShouldFilter(t)) { alignedTargets.Add(t); } } } } epicTargets.AddRange(filteredTargets); if (ProcessorOptions.TargetFilterType == TargetWorkflowType.TOP_DOWN) { dataSet.RegressionResult = aligner.AlignTargets(filteredTargets, alignedTargets); } m_currentItem++; } //Create the database (the list of consensus targets) //Convert the list of targets into a list of MassTagLights for LCMS to use as baseline // Cluster initially to provide a baseline for LCMSWarp var newTargets = clusterer.Cluster(epicTargets); int i = 0, j = 0; var tempConsensusTargets = new List <ConsensusTarget>(); var proteinDict = new Dictionary <string, ProteinInformation>(); foreach (var consensusTarget in newTargets) { consensusTarget.Id = ++i; foreach (var target in consensusTarget.Evidences) { target.Id = ++j; } consensusTarget.CalculateStatistics(); tempConsensusTargets.Add(consensusTarget); } var massTagLightTargets = new List <UMCLight>(); foreach (var evidence in tempConsensusTargets) { var driftStart = double.MaxValue; var driftEnd = double.MinValue; foreach (var member in evidence.Evidences) { driftStart = Math.Min(member.Scan, driftStart); driftEnd = Math.Max(member.Scan, driftEnd); } massTagLightTargets.AddRange(evidence.Charges.Select(charge => new UMCLight { Net = evidence.PredictedNet, ChargeState = charge, Mz = (evidence.TheoreticalMonoIsotopicMass + (charge * 1.00727649)) / charge, MassMonoisotopic = evidence.TheoreticalMonoIsotopicMass, Id = evidence.Id, MassMonoisotopicAligned = evidence.TheoreticalMonoIsotopicMass, DriftTime = driftEnd - driftStart, Scan = (int)((driftStart + driftEnd) / 2.0), ScanStart = (int)driftStart, ScanEnd = (int)driftEnd, })); } if (bWorker.CancellationPending || m_abortRequested) { return(targetDatabase); } var alignmentData = new List <LcmsWarpAlignmentData>(); var options = new LcmsWarpAlignmentOptions(); var lcmsAligner = new LcmsWarpAdapter(options); //For performing net warping without mass correction options.AlignType = PNNLOmics.Algorithms.Alignment.LcmsWarp.AlignmentType.NET_WARP; var lcmsNetAligner = new LcmsWarpAdapter(options); //Foreach dataset foreach (var dataSet in dataSets) { float percentComplete = (float)m_currentItem / m_totalItems; UpdateProgress(m_currentItem, m_totalItems, percentComplete, "Performing LCMSWarp Alignment"); if (bWorker.CancellationPending || m_abortRequested) { return(targetDatabase); } var umcDataset = new List <UMCLight>(); if (dataSet.Tool == LcmsIdentificationTool.MSAlign) { continue; } dataSet.Evidences.Sort((x, y) => x.Scan.CompareTo(y.Scan)); var evidenceAndUmc = new List <EvidenceUMCAssociation>(); // Only put evidences that pass the minimum observed net in this list. var backupDataset = new List <UMCLight>(); foreach (var evidence in dataSet.Evidences) { if (evidence.ObservedNet >= ProcessorOptions.MinimumObservedNet) { UMCLight umc = new UMCLight { Net = evidence.ObservedNet, ChargeState = evidence.Charge, Mz = evidence.Mz, Scan = evidence.Scan, MassMonoisotopic = evidence.MonoisotopicMass, MassMonoisotopicAligned = evidence.MonoisotopicMass, Id = evidence.Id, ScanStart = evidence.Scan, ScanEnd = evidence.Scan, }; umcDataset.Add(umc); backupDataset.Add(umc); evidenceAndUmc.Add(new EvidenceUMCAssociation(evidence, umc)); } } umcDataset.Sort((x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic)); LcmsWarpAlignmentData alignedData; try { alignedData = lcmsAligner.Align(massTagLightTargets, umcDataset); } catch { try { alignedData = lcmsNetAligner.Align(massTagLightTargets, umcDataset); } catch { alignedData = null; } } var netDiffList = new List <double>(); var numBins = Math.Min(50, dataSet.Evidences.Count); var medNetDiff = new double[numBins]; var numPerBin = (int)Math.Ceiling((double)dataSet.Evidences.Count / numBins); var binNum = 0; //Copy the residual data back into the evidences foreach (var group in evidenceAndUmc) { group.Evidence.MonoisotopicMass = group.UMC.MassMonoisotopicAligned; var netShift = group.UMC.NetAligned - group.UMC.Net; netDiffList.Add(netShift); group.Evidence.NetShift = netShift; group.Evidence.ObservedNet += netShift; if (netDiffList.Count % numPerBin == 0) { medNetDiff[binNum] = netDiffList.Median(); netDiffList.Clear(); binNum++; } } if (netDiffList.Count != 0) { medNetDiff[binNum] = netDiffList.Median(); netDiffList.Clear(); } foreach (var data in dataSet.Evidences.Where(data => !evidenceMap.ContainsKey(data.Id))) { evidenceMap.Add(data.Id, data); } if (alignedData != null) { dataSet.RegressionResult.Slope = alignedData.NetSlope; dataSet.RegressionResult.Intercept = alignedData.NetIntercept; dataSet.RegressionResult.RSquared = alignedData.NetRsquared; alignmentData.Add(alignedData); } else { dataSet.RegressionResult.Slope = 1; dataSet.RegressionResult.Intercept = 0; dataSet.RegressionResult.RSquared = 0; } m_currentItem++; } if (AlignmentComplete != null) { AlignmentComplete(this, new AlignmentCompleteArgs(alignmentData)); } if (ProcessorOptions.TargetFilterType != TargetWorkflowType.TOP_DOWN) { i = j = 0; foreach (var consensus in tempConsensusTargets) { for (var evNum = 0; evNum < consensus.Evidences.Count; evNum++) { consensus.Evidences[evNum] = evidenceMap[consensus.Evidences[evNum].Id]; } //Recalculate the target's data from the warped values consensus.Id = ++i; foreach (var target in consensus.Evidences) { target.Id = ++j; } consensus.CalculateStatistics(); targetDatabase.AddConsensusTarget(consensus); foreach (var protein in consensus.Proteins) { if (!proteinDict.ContainsKey(protein.ProteinName)) { proteinDict.Add(protein.ProteinName, protein); // Don't need to manually link the first consensus to the protein continue; } proteinDict[protein.ProteinName].Consensus.Add(consensus); } } targetDatabase.Proteins = proteinDict.Values.ToList(); } return(targetDatabase); }