Ejemplo n.º 1
0
        public TargetDatabase Process(IEnumerable <LcmsDataSet> dataSets, BackgroundWorker bWorker)
        {
            m_abortRequested = false;
            m_currentItem    = 0;
            dataSets         = dataSets.ToList();
            m_totalItems     = 2 * dataSets.Count();

            OnPercentProgressChanged(new PercentCompleteEventArgs(0));

            // Deal with DataSetId - Auto increments - Not in this class only
            var evidenceMap    = new Dictionary <int, Evidence>();
            var targetDatabase = new TargetDatabase();
            var aligner        = TargetAlignmentFactory.Create(ProcessorOptions);
            var clusterer      = TargetClustererFactory.Create(ProcessorOptions.TargetFilterType);
            var epicTargets    = new List <Evidence>();

            foreach (var dataSet in dataSets)
            {
                float percentComplete = (float)m_currentItem / m_totalItems;
                UpdateProgress(m_currentItem, m_totalItems, percentComplete, "Determining Consensus Targets");
                if (bWorker.CancellationPending || m_abortRequested)
                {
                    return(targetDatabase);
                }

                var targetFilter    = TargetFilterFactory.Create(dataSet.Tool, ProcessorOptions);
                var alignmentFilter = AlignmentFilterFactory.Create(dataSet.Tool, ProcessorOptions);

                var filteredTargets = new List <Evidence>();
                var alignedTargets  = new List <Evidence>();

                foreach (var t in dataSet.Evidences)
                {
                    // Exclude carryover peptides.
                    // Would be evidenced by a sizable difference between observed net and predicted net

                    if (t.ObservedNet >= ProcessorOptions.MinimumObservedNet &&
                        t.ObservedNet <= ProcessorOptions.MaximumObservedNet)
                    {
                        // To prevent filtration of evidences which have previously passed alignment,
                        if (dataSet.PreviouslyAnalyzed || !targetFilter.ShouldFilter(t))
                        {
                            filteredTargets.Add(t);

                            if (!alignmentFilter.ShouldFilter(t))
                            {
                                alignedTargets.Add(t);
                            }
                        }
                    }
                }

                epicTargets.AddRange(filteredTargets);

                if (ProcessorOptions.TargetFilterType == TargetWorkflowType.TOP_DOWN)
                {
                    dataSet.RegressionResult = aligner.AlignTargets(filteredTargets, alignedTargets);
                }

                m_currentItem++;
            }

            //Create the database (the list of consensus targets)
            //Convert the list of targets into a list of MassTagLights for LCMS to use as baseline

            // Cluster initially to provide a baseline for LCMSWarp
            var newTargets = clusterer.Cluster(epicTargets);
            int i = 0, j = 0;
            var tempConsensusTargets = new List <ConsensusTarget>();
            var proteinDict          = new Dictionary <string, ProteinInformation>();

            foreach (var consensusTarget in newTargets)
            {
                consensusTarget.Id = ++i;

                foreach (var target in consensusTarget.Evidences)
                {
                    target.Id = ++j;
                }
                consensusTarget.CalculateStatistics();
                tempConsensusTargets.Add(consensusTarget);
            }

            var massTagLightTargets = new List <UMCLight>();

            foreach (var evidence in tempConsensusTargets)
            {
                var driftStart = double.MaxValue;
                var driftEnd   = double.MinValue;

                foreach (var member in evidence.Evidences)
                {
                    driftStart = Math.Min(member.Scan, driftStart);
                    driftEnd   = Math.Max(member.Scan, driftEnd);
                }

                massTagLightTargets.AddRange(evidence.Charges.Select(charge => new UMCLight
                {
                    Net                     = evidence.PredictedNet,
                    ChargeState             = charge,
                    Mz                      = (evidence.TheoreticalMonoIsotopicMass + (charge * 1.00727649)) / charge,
                    MassMonoisotopic        = evidence.TheoreticalMonoIsotopicMass,
                    Id                      = evidence.Id,
                    MassMonoisotopicAligned = evidence.TheoreticalMonoIsotopicMass,
                    DriftTime               = driftEnd - driftStart,
                    Scan                    = (int)((driftStart + driftEnd) / 2.0),
                    ScanStart               = (int)driftStart,
                    ScanEnd                 = (int)driftEnd,
                }));
            }

            if (bWorker.CancellationPending || m_abortRequested)
            {
                return(targetDatabase);
            }

            var alignmentData = new List <LcmsWarpAlignmentData>();
            var options       = new LcmsWarpAlignmentOptions();
            var lcmsAligner   = new LcmsWarpAdapter(options);

            //For performing net warping without mass correction
            options.AlignType = PNNLOmics.Algorithms.Alignment.LcmsWarp.AlignmentType.NET_WARP;
            var lcmsNetAligner = new LcmsWarpAdapter(options);

            //Foreach dataset
            foreach (var dataSet in dataSets)
            {
                float percentComplete = (float)m_currentItem / m_totalItems;
                UpdateProgress(m_currentItem, m_totalItems, percentComplete, "Performing LCMSWarp Alignment");
                if (bWorker.CancellationPending || m_abortRequested)
                {
                    return(targetDatabase);
                }

                var umcDataset = new List <UMCLight>();
                if (dataSet.Tool == LcmsIdentificationTool.MSAlign)
                {
                    continue;
                }

                dataSet.Evidences.Sort((x, y) => x.Scan.CompareTo(y.Scan));

                var evidenceAndUmc = new List <EvidenceUMCAssociation>(); // Only put evidences that pass the minimum observed net in this list.
                var backupDataset  = new List <UMCLight>();
                foreach (var evidence in dataSet.Evidences)
                {
                    if (evidence.ObservedNet >= ProcessorOptions.MinimumObservedNet)
                    {
                        UMCLight umc = new UMCLight
                        {
                            Net                     = evidence.ObservedNet,
                            ChargeState             = evidence.Charge,
                            Mz                      = evidence.Mz,
                            Scan                    = evidence.Scan,
                            MassMonoisotopic        = evidence.MonoisotopicMass,
                            MassMonoisotopicAligned = evidence.MonoisotopicMass,
                            Id                      = evidence.Id,
                            ScanStart               = evidence.Scan,
                            ScanEnd                 = evidence.Scan,
                        };
                        umcDataset.Add(umc);
                        backupDataset.Add(umc);
                        evidenceAndUmc.Add(new EvidenceUMCAssociation(evidence, umc));
                    }
                }
                umcDataset.Sort((x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic));
                LcmsWarpAlignmentData alignedData;
                try
                {
                    alignedData = lcmsAligner.Align(massTagLightTargets, umcDataset);
                }
                catch
                {
                    try
                    {
                        alignedData = lcmsNetAligner.Align(massTagLightTargets, umcDataset);
                    }
                    catch
                    {
                        alignedData = null;
                    }
                }

                var netDiffList = new List <double>();
                var numBins     = Math.Min(50, dataSet.Evidences.Count);
                var medNetDiff  = new double[numBins];
                var numPerBin   = (int)Math.Ceiling((double)dataSet.Evidences.Count / numBins);
                var binNum      = 0;

                //Copy the residual data back into the evidences
                foreach (var group in evidenceAndUmc)
                {
                    group.Evidence.MonoisotopicMass = group.UMC.MassMonoisotopicAligned;
                    var netShift = group.UMC.NetAligned - group.UMC.Net;
                    netDiffList.Add(netShift);
                    group.Evidence.NetShift     = netShift;
                    group.Evidence.ObservedNet += netShift;

                    if (netDiffList.Count % numPerBin == 0)
                    {
                        medNetDiff[binNum] = netDiffList.Median();
                        netDiffList.Clear();
                        binNum++;
                    }
                }
                if (netDiffList.Count != 0)
                {
                    medNetDiff[binNum] = netDiffList.Median();
                    netDiffList.Clear();
                }


                foreach (var data in dataSet.Evidences.Where(data => !evidenceMap.ContainsKey(data.Id)))
                {
                    evidenceMap.Add(data.Id, data);
                }
                if (alignedData != null)
                {
                    dataSet.RegressionResult.Slope     = alignedData.NetSlope;
                    dataSet.RegressionResult.Intercept = alignedData.NetIntercept;
                    dataSet.RegressionResult.RSquared  = alignedData.NetRsquared;
                    alignmentData.Add(alignedData);
                }
                else
                {
                    dataSet.RegressionResult.Slope     = 1;
                    dataSet.RegressionResult.Intercept = 0;
                    dataSet.RegressionResult.RSquared  = 0;
                }
                m_currentItem++;
            }

            if (AlignmentComplete != null)
            {
                AlignmentComplete(this, new AlignmentCompleteArgs(alignmentData));
            }
            if (ProcessorOptions.TargetFilterType != TargetWorkflowType.TOP_DOWN)
            {
                i = j = 0;
                foreach (var consensus in tempConsensusTargets)
                {
                    for (var evNum = 0; evNum < consensus.Evidences.Count; evNum++)
                    {
                        consensus.Evidences[evNum] = evidenceMap[consensus.Evidences[evNum].Id];
                    }
                    //Recalculate the target's data from the warped values
                    consensus.Id = ++i;
                    foreach (var target in consensus.Evidences)
                    {
                        target.Id = ++j;
                    }
                    consensus.CalculateStatistics();
                    targetDatabase.AddConsensusTarget(consensus);
                    foreach (var protein in consensus.Proteins)
                    {
                        if (!proteinDict.ContainsKey(protein.ProteinName))
                        {
                            proteinDict.Add(protein.ProteinName, protein);
                            // Don't need to manually link the first consensus to the protein
                            continue;
                        }
                        proteinDict[protein.ProteinName].Consensus.Add(consensus);
                    }
                }
                targetDatabase.Proteins = proteinDict.Values.ToList();
            }
            return(targetDatabase);
        }
Ejemplo n.º 2
0
        private void ProcessAnalysisJobInternal(Analysis analysis, Options options, IRetentionTimePredictor predictor)
        {
            analysis.ProcessedState = ProcessingState.Processing;
            if (this.ProcessingAnalysis != null)
            {
                ProcessingAnalysis(this, new AnalysisCompletedEventArgs(1, 1, analysis));
            }

            IAnalysisReader reader = SequenceAnalysisToolsFactory.CreateReader(analysis.Tool);
            ITargetFilter   filter = TargetFilterFactory.CreateFilters(analysis.Tool, options);

            IRegressionAlgorithm regressor = RegressorFactory.CreateRegressor(RegressorType.LcmsRegressor, options.Regression);
            Analysis             results   = reader.Read(analysis.FilePath, analysis.Name);


            List <Target> targets = new List <Target>();
            Dictionary <string, Protein> proteins = new Dictionary <string, Protein>();

            foreach (var target in results.Targets)
            {
                if (options.ShouldFilter(target))
                {
                    continue;
                }

                targets.Add(target);

                foreach (var protein in target.Proteins)
                {
                    if (!proteins.ContainsKey(protein.Reference))
                    {
                        analysis.Proteins.Add(protein);
                        proteins.Add(protein.Reference, protein);
                    }
                }
            }
            analysis.AddTargets(targets);

            // Calculate the regression based on the target data
            List <float> predicted = new List <float>();
            List <float> scans     = new List <float>();

            int maxScan = 0;
            int minScan = 0;



            foreach (Target target in analysis.Targets)
            {
                // Filter the target here...based on use for alignment.
                if (filter.ShouldFilter(target))
                {
                    continue;
                }

                maxScan = Math.Max(maxScan, target.Scan);
                minScan = Math.Min(minScan, target.Scan);

                target.IsPredicted  = true;
                target.NetPredicted = predictor.GetElutionTime(target.CleanSequence);
                scans.Add(target.Scan);
                predicted.Add(Convert.ToSingle(target.NetPredicted));
            }
            analysis.RegressionResults = regressor.CalculateRegression(scans, predicted);
            //analysis.RegressionResults.Regressor = regressor;

            analysis.RegressionResults.MinScan = minScan;
            analysis.RegressionResults.MaxScan = maxScan;

            // Make the regression line data.
            int scan  = 0;
            int delta = 5;

            while (scan <= maxScan)
            {
                double y = regressor.GetTransformedNET(scan);
                scan += delta;

                analysis.RegressionResults.XValues.Add(Convert.ToDouble(scan));
                analysis.RegressionResults.YValues.Add(y);
            }


            // Then make sure we align all against the predicted self
            regressor.ApplyTransformation(analysis.Targets);

            analysis.ProcessedState = ProcessingState.Processed;
        }