コード例 #1
0
ファイル: LCMSWarpTest.cs プロジェクト: msdna/MultiAlign
        public void TestLcmsWarpPort(string relativeBaselinePath, string relativeAligneePath, string relativeOutput, string name)
        {
            var baselinePath    = GetPath(relativeBaselinePath);
            var aligneePath     = GetPath(relativeAligneePath);
            var options = new LcmsWarpAlignmentOptions
            {
                AlignType = AlignmentType.NET_MASS_WARP,
                CalibrationType = LcmsWarpCalibrationType.Both
            };
            var aligner         = new LcmsWarpAdapter(options);

            var rawBaselineData = File.ReadAllLines(baselinePath);
            var rawFeaturesData = File.ReadAllLines(aligneePath);
            var outputPath      = GetOutputPath(relativeOutput);
            var delimiter       = new[] {TextDelimiter};

            if (!Directory.Exists(outputPath))
            {
                Directory.CreateDirectory(outputPath);
            }

            var baseline        = (from line in rawBaselineData
                where line != ""
                select line.Split(delimiter, StringSplitOptions.RemoveEmptyEntries)
                into parsed
                select new UMCLight
                {
                    Net                     = Convert.ToDouble(parsed[0]),
                    ChargeState             = Convert.ToInt32(parsed[1]),
                    Mz                      = Convert.ToDouble(parsed[2]),
                    Scan                    = Convert.ToInt32(parsed[3]),
                    MassMonoisotopic        = Convert.ToDouble(parsed[4]),
                    MassMonoisotopicAligned = Convert.ToDouble(parsed[5]),
                    Id                      = Convert.ToInt32(parsed[6]),
                    ScanStart               = Convert.ToInt32(parsed[7]),
                    ScanEnd                 = Convert.ToInt32(parsed[8]),
                    ScanAligned             = Convert.ToInt32(parsed[9])
                }).ToList();

            var features = (from line in rawFeaturesData
                where line != ""
                            select line.Split(delimiter, StringSplitOptions.RemoveEmptyEntries)
                into parsed
                select new UMCLight
                {
                    Net                     = Convert.ToDouble(parsed[0]),
                    ChargeState             = Convert.ToInt32(parsed[1]),
                    Mz                      = Convert.ToDouble(parsed[2]),
                    Scan                    = Convert.ToInt32(parsed[3]),
                    MassMonoisotopic        = Convert.ToDouble(parsed[4]),
                    MassMonoisotopicAligned = Convert.ToDouble(parsed[5]),
                    Id                      = Convert.ToInt32(parsed[6]),
                    ScanStart               = Convert.ToInt32(parsed[7]),
                    ScanEnd                 = Convert.ToInt32(parsed[8]),
                    ScanAligned             = Convert.ToInt32(parsed[9])
                }).ToList();

            var outputData  = aligner.Align(baseline, features);
            var residuals   = outputData.ResidualData;

            var heatmap        = HeatmapFactory.CreateAlignedHeatmap(outputData.HeatScores);
            var netHistogram = HistogramFactory.CreateHistogram(outputData.NetErrorHistogram, "NET Error", "NET Error");
            var massHistogram = HistogramFactory.CreateHistogram(outputData.MassErrorHistogram, "Mass Error", "Mass Error (ppm)");

            var netResidual         = ScatterPlotFactory.CreateResidualPlot(residuals.Scan, residuals.LinearCustomNet,
               residuals.LinearNet, "NET Residuals", "Scans", "NET");
            var massMzResidual      = ScatterPlotFactory.CreateResidualPlot(residuals.Mz, residuals.MzMassError,
                residuals.MzMassErrorCorrected, "Mass Residuals", "m/z", "Mass Errors");
            var massScanResidual    = ScatterPlotFactory.CreateResidualPlot(residuals.Scan, residuals.MzMassError,
                residuals.MzMassErrorCorrected, "Mass Residuals", "Scan", "Mass Errors");

            var directory   = Path.Combine(outputPath, name);

            var encoder     = new SvgEncoder();
            PlotImageUtility.SaveImage(heatmap,             directory + "_heatmap.svg",             encoder);
            PlotImageUtility.SaveImage(netResidual,         directory + "_netResidual.svg",         encoder);
            PlotImageUtility.SaveImage(massMzResidual,      directory + "_massMzResidual.svg",      encoder);
            PlotImageUtility.SaveImage(massScanResidual,    directory + "_massScanResidual.svg",    encoder);
            PlotImageUtility.SaveImage(netHistogram,       directory + "_netHistogram.svg",       encoder);
            PlotImageUtility.SaveImage(massHistogram,      directory + "_massHistogram.svg",      encoder);
        }
コード例 #2
0
        public TargetDatabase Process(IEnumerable <LcmsDataSet> dataSets, BackgroundWorker bWorker)
        {
            m_abortRequested = false;
            m_currentItem    = 0;
            dataSets         = dataSets.ToList();
            m_totalItems     = 2 * dataSets.Count();

            OnPercentProgressChanged(new PercentCompleteEventArgs(0));

            // Deal with DataSetId - Auto increments - Not in this class only
            var evidenceMap    = new Dictionary <int, Evidence>();
            var targetDatabase = new TargetDatabase();
            var aligner        = TargetAlignmentFactory.Create(ProcessorOptions);
            var clusterer      = TargetClustererFactory.Create(ProcessorOptions.TargetFilterType);
            var epicTargets    = new List <Evidence>();

            foreach (var dataSet in dataSets)
            {
                float percentComplete = (float)m_currentItem / m_totalItems;
                UpdateProgress(m_currentItem, m_totalItems, percentComplete, "Determining Consensus Targets");
                if (bWorker.CancellationPending || m_abortRequested)
                {
                    return(targetDatabase);
                }

                var targetFilter    = TargetFilterFactory.Create(dataSet.Tool, ProcessorOptions);
                var alignmentFilter = AlignmentFilterFactory.Create(dataSet.Tool, ProcessorOptions);

                var filteredTargets = new List <Evidence>();
                var alignedTargets  = new List <Evidence>();

                foreach (var t in dataSet.Evidences)
                {
                    // Exclude carryover peptides.
                    // Would be evidenced by a sizable difference between observed net and predicted net

                    if (t.ObservedNet >= ProcessorOptions.MinimumObservedNet &&
                        t.ObservedNet <= ProcessorOptions.MaximumObservedNet)
                    {
                        // To prevent filtration of evidences which have previously passed alignment,
                        if (dataSet.PreviouslyAnalyzed || !targetFilter.ShouldFilter(t))
                        {
                            filteredTargets.Add(t);

                            if (!alignmentFilter.ShouldFilter(t))
                            {
                                alignedTargets.Add(t);
                            }
                        }
                    }
                }

                epicTargets.AddRange(filteredTargets);

                if (ProcessorOptions.TargetFilterType == TargetWorkflowType.TOP_DOWN)
                {
                    dataSet.RegressionResult = aligner.AlignTargets(filteredTargets, alignedTargets);
                }

                m_currentItem++;
            }

            //Create the database (the list of consensus targets)
            //Convert the list of targets into a list of MassTagLights for LCMS to use as baseline

            // Cluster initially to provide a baseline for LCMSWarp
            var newTargets = clusterer.Cluster(epicTargets);
            int i = 0, j = 0;
            var tempConsensusTargets = new List <ConsensusTarget>();
            var proteinDict          = new Dictionary <string, ProteinInformation>();

            foreach (var consensusTarget in newTargets)
            {
                consensusTarget.Id = ++i;

                foreach (var target in consensusTarget.Evidences)
                {
                    target.Id = ++j;
                }
                consensusTarget.CalculateStatistics();
                tempConsensusTargets.Add(consensusTarget);
            }

            var massTagLightTargets = new List <UMCLight>();

            foreach (var evidence in tempConsensusTargets)
            {
                var driftStart = double.MaxValue;
                var driftEnd   = double.MinValue;

                foreach (var member in evidence.Evidences)
                {
                    driftStart = Math.Min(member.Scan, driftStart);
                    driftEnd   = Math.Max(member.Scan, driftEnd);
                }

                massTagLightTargets.AddRange(evidence.Charges.Select(charge => new UMCLight
                {
                    Net                     = evidence.PredictedNet,
                    ChargeState             = charge,
                    Mz                      = (evidence.TheoreticalMonoIsotopicMass + (charge * 1.00727649)) / charge,
                    MassMonoisotopic        = evidence.TheoreticalMonoIsotopicMass,
                    Id                      = evidence.Id,
                    MassMonoisotopicAligned = evidence.TheoreticalMonoIsotopicMass,
                    DriftTime               = driftEnd - driftStart,
                    Scan                    = (int)((driftStart + driftEnd) / 2.0),
                    ScanStart               = (int)driftStart,
                    ScanEnd                 = (int)driftEnd,
                }));
            }

            if (bWorker.CancellationPending || m_abortRequested)
            {
                return(targetDatabase);
            }

            var alignmentData = new List <LcmsWarpAlignmentData>();
            var options       = new LcmsWarpAlignmentOptions();
            var lcmsAligner   = new LcmsWarpAdapter(options);

            //For performing net warping without mass correction
            options.AlignType = PNNLOmics.Algorithms.Alignment.LcmsWarp.AlignmentType.NET_WARP;
            var lcmsNetAligner = new LcmsWarpAdapter(options);

            //Foreach dataset
            foreach (var dataSet in dataSets)
            {
                float percentComplete = (float)m_currentItem / m_totalItems;
                UpdateProgress(m_currentItem, m_totalItems, percentComplete, "Performing LCMSWarp Alignment");
                if (bWorker.CancellationPending || m_abortRequested)
                {
                    return(targetDatabase);
                }

                var umcDataset = new List <UMCLight>();
                if (dataSet.Tool == LcmsIdentificationTool.MSAlign)
                {
                    continue;
                }

                dataSet.Evidences.Sort((x, y) => x.Scan.CompareTo(y.Scan));

                var evidenceAndUmc = new List <EvidenceUMCAssociation>(); // Only put evidences that pass the minimum observed net in this list.
                var backupDataset  = new List <UMCLight>();
                foreach (var evidence in dataSet.Evidences)
                {
                    if (evidence.ObservedNet >= ProcessorOptions.MinimumObservedNet)
                    {
                        UMCLight umc = new UMCLight
                        {
                            Net                     = evidence.ObservedNet,
                            ChargeState             = evidence.Charge,
                            Mz                      = evidence.Mz,
                            Scan                    = evidence.Scan,
                            MassMonoisotopic        = evidence.MonoisotopicMass,
                            MassMonoisotopicAligned = evidence.MonoisotopicMass,
                            Id                      = evidence.Id,
                            ScanStart               = evidence.Scan,
                            ScanEnd                 = evidence.Scan,
                        };
                        umcDataset.Add(umc);
                        backupDataset.Add(umc);
                        evidenceAndUmc.Add(new EvidenceUMCAssociation(evidence, umc));
                    }
                }
                umcDataset.Sort((x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic));
                LcmsWarpAlignmentData alignedData;
                try
                {
                    alignedData = lcmsAligner.Align(massTagLightTargets, umcDataset);
                }
                catch
                {
                    try
                    {
                        alignedData = lcmsNetAligner.Align(massTagLightTargets, umcDataset);
                    }
                    catch
                    {
                        alignedData = null;
                    }
                }

                var netDiffList = new List <double>();
                var numBins     = Math.Min(50, dataSet.Evidences.Count);
                var medNetDiff  = new double[numBins];
                var numPerBin   = (int)Math.Ceiling((double)dataSet.Evidences.Count / numBins);
                var binNum      = 0;

                //Copy the residual data back into the evidences
                foreach (var group in evidenceAndUmc)
                {
                    group.Evidence.MonoisotopicMass = group.UMC.MassMonoisotopicAligned;
                    var netShift = group.UMC.NetAligned - group.UMC.Net;
                    netDiffList.Add(netShift);
                    group.Evidence.NetShift     = netShift;
                    group.Evidence.ObservedNet += netShift;

                    if (netDiffList.Count % numPerBin == 0)
                    {
                        medNetDiff[binNum] = netDiffList.Median();
                        netDiffList.Clear();
                        binNum++;
                    }
                }
                if (netDiffList.Count != 0)
                {
                    medNetDiff[binNum] = netDiffList.Median();
                    netDiffList.Clear();
                }


                foreach (var data in dataSet.Evidences.Where(data => !evidenceMap.ContainsKey(data.Id)))
                {
                    evidenceMap.Add(data.Id, data);
                }
                if (alignedData != null)
                {
                    dataSet.RegressionResult.Slope     = alignedData.NetSlope;
                    dataSet.RegressionResult.Intercept = alignedData.NetIntercept;
                    dataSet.RegressionResult.RSquared  = alignedData.NetRsquared;
                    alignmentData.Add(alignedData);
                }
                else
                {
                    dataSet.RegressionResult.Slope     = 1;
                    dataSet.RegressionResult.Intercept = 0;
                    dataSet.RegressionResult.RSquared  = 0;
                }
                m_currentItem++;
            }

            if (AlignmentComplete != null)
            {
                AlignmentComplete(this, new AlignmentCompleteArgs(alignmentData));
            }
            if (ProcessorOptions.TargetFilterType != TargetWorkflowType.TOP_DOWN)
            {
                i = j = 0;
                foreach (var consensus in tempConsensusTargets)
                {
                    for (var evNum = 0; evNum < consensus.Evidences.Count; evNum++)
                    {
                        consensus.Evidences[evNum] = evidenceMap[consensus.Evidences[evNum].Id];
                    }
                    //Recalculate the target's data from the warped values
                    consensus.Id = ++i;
                    foreach (var target in consensus.Evidences)
                    {
                        target.Id = ++j;
                    }
                    consensus.CalculateStatistics();
                    targetDatabase.AddConsensusTarget(consensus);
                    foreach (var protein in consensus.Proteins)
                    {
                        if (!proteinDict.ContainsKey(protein.ProteinName))
                        {
                            proteinDict.Add(protein.ProteinName, protein);
                            // Don't need to manually link the first consensus to the protein
                            continue;
                        }
                        proteinDict[protein.ProteinName].Consensus.Add(consensus);
                    }
                }
                targetDatabase.Proteins = proteinDict.Values.ToList();
            }
            return(targetDatabase);
        }