예제 #1
0
        public TargetDatabase Process(IEnumerable <LcmsDataSet> dataSets, BackgroundWorker bWorker)
        {
            m_abortRequested = false;
            m_currentItem    = 0;
            dataSets         = dataSets.ToList();
            m_totalItems     = 2 * dataSets.Count();

            OnPercentProgressChanged(new PercentCompleteEventArgs(0));

            // Deal with DataSetId - Auto increments - Not in this class only
            var evidenceMap    = new Dictionary <int, Evidence>();
            var targetDatabase = new TargetDatabase();
            var aligner        = TargetAlignmentFactory.Create(ProcessorOptions);
            var clusterer      = TargetClustererFactory.Create(ProcessorOptions.TargetFilterType);
            var epicTargets    = new List <Evidence>();

            foreach (var dataSet in dataSets)
            {
                float percentComplete = (float)m_currentItem / m_totalItems;
                UpdateProgress(m_currentItem, m_totalItems, percentComplete, "Determining Consensus Targets");
                if (bWorker.CancellationPending || m_abortRequested)
                {
                    return(targetDatabase);
                }

                var targetFilter    = TargetFilterFactory.Create(dataSet.Tool, ProcessorOptions);
                var alignmentFilter = AlignmentFilterFactory.Create(dataSet.Tool, ProcessorOptions);

                var filteredTargets = new List <Evidence>();
                var alignedTargets  = new List <Evidence>();

                foreach (var t in dataSet.Evidences)
                {
                    // Exclude carryover peptides.
                    // Would be evidenced by a sizable difference between observed net and predicted net

                    if (t.ObservedNet >= ProcessorOptions.MinimumObservedNet &&
                        t.ObservedNet <= ProcessorOptions.MaximumObservedNet)
                    {
                        // To prevent filtration of evidences which have previously passed alignment,
                        if (dataSet.PreviouslyAnalyzed || !targetFilter.ShouldFilter(t))
                        {
                            filteredTargets.Add(t);

                            if (!alignmentFilter.ShouldFilter(t))
                            {
                                alignedTargets.Add(t);
                            }
                        }
                    }
                }

                epicTargets.AddRange(filteredTargets);

                if (ProcessorOptions.TargetFilterType == TargetWorkflowType.TOP_DOWN)
                {
                    dataSet.RegressionResult = aligner.AlignTargets(filteredTargets, alignedTargets);
                }

                m_currentItem++;
            }

            //Create the database (the list of consensus targets)
            //Convert the list of targets into a list of MassTagLights for LCMS to use as baseline

            // Cluster initially to provide a baseline for LCMSWarp
            var newTargets = clusterer.Cluster(epicTargets);
            int i = 0, j = 0;
            var tempConsensusTargets = new List <ConsensusTarget>();
            var proteinDict          = new Dictionary <string, ProteinInformation>();

            foreach (var consensusTarget in newTargets)
            {
                consensusTarget.Id = ++i;

                foreach (var target in consensusTarget.Evidences)
                {
                    target.Id = ++j;
                }
                consensusTarget.CalculateStatistics();
                tempConsensusTargets.Add(consensusTarget);
            }

            var massTagLightTargets = new List <UMCLight>();

            foreach (var evidence in tempConsensusTargets)
            {
                var driftStart = double.MaxValue;
                var driftEnd   = double.MinValue;

                foreach (var member in evidence.Evidences)
                {
                    driftStart = Math.Min(member.Scan, driftStart);
                    driftEnd   = Math.Max(member.Scan, driftEnd);
                }

                massTagLightTargets.AddRange(evidence.Charges.Select(charge => new UMCLight
                {
                    Net                     = evidence.PredictedNet,
                    ChargeState             = charge,
                    Mz                      = (evidence.TheoreticalMonoIsotopicMass + (charge * 1.00727649)) / charge,
                    MassMonoisotopic        = evidence.TheoreticalMonoIsotopicMass,
                    Id                      = evidence.Id,
                    MassMonoisotopicAligned = evidence.TheoreticalMonoIsotopicMass,
                    DriftTime               = driftEnd - driftStart,
                    Scan                    = (int)((driftStart + driftEnd) / 2.0),
                    ScanStart               = (int)driftStart,
                    ScanEnd                 = (int)driftEnd,
                }));
            }

            if (bWorker.CancellationPending || m_abortRequested)
            {
                return(targetDatabase);
            }

            var alignmentData = new List <LcmsWarpAlignmentData>();
            var options       = new LcmsWarpAlignmentOptions();
            var lcmsAligner   = new LcmsWarpAdapter(options);

            //For performing net warping without mass correction
            options.AlignType = PNNLOmics.Algorithms.Alignment.LcmsWarp.AlignmentType.NET_WARP;
            var lcmsNetAligner = new LcmsWarpAdapter(options);

            //Foreach dataset
            foreach (var dataSet in dataSets)
            {
                float percentComplete = (float)m_currentItem / m_totalItems;
                UpdateProgress(m_currentItem, m_totalItems, percentComplete, "Performing LCMSWarp Alignment");
                if (bWorker.CancellationPending || m_abortRequested)
                {
                    return(targetDatabase);
                }

                var umcDataset = new List <UMCLight>();
                if (dataSet.Tool == LcmsIdentificationTool.MSAlign)
                {
                    continue;
                }

                dataSet.Evidences.Sort((x, y) => x.Scan.CompareTo(y.Scan));

                var evidenceAndUmc = new List <EvidenceUMCAssociation>(); // Only put evidences that pass the minimum observed net in this list.
                var backupDataset  = new List <UMCLight>();
                foreach (var evidence in dataSet.Evidences)
                {
                    if (evidence.ObservedNet >= ProcessorOptions.MinimumObservedNet)
                    {
                        UMCLight umc = new UMCLight
                        {
                            Net                     = evidence.ObservedNet,
                            ChargeState             = evidence.Charge,
                            Mz                      = evidence.Mz,
                            Scan                    = evidence.Scan,
                            MassMonoisotopic        = evidence.MonoisotopicMass,
                            MassMonoisotopicAligned = evidence.MonoisotopicMass,
                            Id                      = evidence.Id,
                            ScanStart               = evidence.Scan,
                            ScanEnd                 = evidence.Scan,
                        };
                        umcDataset.Add(umc);
                        backupDataset.Add(umc);
                        evidenceAndUmc.Add(new EvidenceUMCAssociation(evidence, umc));
                    }
                }
                umcDataset.Sort((x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic));
                LcmsWarpAlignmentData alignedData;
                try
                {
                    alignedData = lcmsAligner.Align(massTagLightTargets, umcDataset);
                }
                catch
                {
                    try
                    {
                        alignedData = lcmsNetAligner.Align(massTagLightTargets, umcDataset);
                    }
                    catch
                    {
                        alignedData = null;
                    }
                }

                var netDiffList = new List <double>();
                var numBins     = Math.Min(50, dataSet.Evidences.Count);
                var medNetDiff  = new double[numBins];
                var numPerBin   = (int)Math.Ceiling((double)dataSet.Evidences.Count / numBins);
                var binNum      = 0;

                //Copy the residual data back into the evidences
                foreach (var group in evidenceAndUmc)
                {
                    group.Evidence.MonoisotopicMass = group.UMC.MassMonoisotopicAligned;
                    var netShift = group.UMC.NetAligned - group.UMC.Net;
                    netDiffList.Add(netShift);
                    group.Evidence.NetShift     = netShift;
                    group.Evidence.ObservedNet += netShift;

                    if (netDiffList.Count % numPerBin == 0)
                    {
                        medNetDiff[binNum] = netDiffList.Median();
                        netDiffList.Clear();
                        binNum++;
                    }
                }
                if (netDiffList.Count != 0)
                {
                    medNetDiff[binNum] = netDiffList.Median();
                    netDiffList.Clear();
                }


                foreach (var data in dataSet.Evidences.Where(data => !evidenceMap.ContainsKey(data.Id)))
                {
                    evidenceMap.Add(data.Id, data);
                }
                if (alignedData != null)
                {
                    dataSet.RegressionResult.Slope     = alignedData.NetSlope;
                    dataSet.RegressionResult.Intercept = alignedData.NetIntercept;
                    dataSet.RegressionResult.RSquared  = alignedData.NetRsquared;
                    alignmentData.Add(alignedData);
                }
                else
                {
                    dataSet.RegressionResult.Slope     = 1;
                    dataSet.RegressionResult.Intercept = 0;
                    dataSet.RegressionResult.RSquared  = 0;
                }
                m_currentItem++;
            }

            if (AlignmentComplete != null)
            {
                AlignmentComplete(this, new AlignmentCompleteArgs(alignmentData));
            }
            if (ProcessorOptions.TargetFilterType != TargetWorkflowType.TOP_DOWN)
            {
                i = j = 0;
                foreach (var consensus in tempConsensusTargets)
                {
                    for (var evNum = 0; evNum < consensus.Evidences.Count; evNum++)
                    {
                        consensus.Evidences[evNum] = evidenceMap[consensus.Evidences[evNum].Id];
                    }
                    //Recalculate the target's data from the warped values
                    consensus.Id = ++i;
                    foreach (var target in consensus.Evidences)
                    {
                        target.Id = ++j;
                    }
                    consensus.CalculateStatistics();
                    targetDatabase.AddConsensusTarget(consensus);
                    foreach (var protein in consensus.Proteins)
                    {
                        if (!proteinDict.ContainsKey(protein.ProteinName))
                        {
                            proteinDict.Add(protein.ProteinName, protein);
                            // Don't need to manually link the first consensus to the protein
                            continue;
                        }
                        proteinDict[protein.ProteinName].Consensus.Add(consensus);
                    }
                }
                targetDatabase.Proteins = proteinDict.Values.ToList();
            }
            return(targetDatabase);
        }
예제 #2
0
        public TargetDatabase ReadDb(string path)
        {
            // Read in the data from the access database
            // put it into a text file (?)
            // Read the data from the text file into program
            var accApplication = new ACCESS.Application();

            var    pathPieces = path.Split('\\');
            string directory  = "";

            foreach (var piece in pathPieces)
            {
                if (piece.Contains("."))
                {
                    continue;
                }
                directory += piece;
                directory += "\\";
            }

            accApplication.OpenCurrentDatabase(path);
            accApplication.DoCmd.TransferText(TransferType: ACCESS.AcTextTransferType.acExportDelim,
                                              TableName: "AMT", FileName: directory + "outTempAMT.txt", HasFieldNames: true);
            accApplication.DoCmd.TransferText(TransferType: ACCESS.AcTextTransferType.acExportDelim,
                                              TableName: "AMT_Proteins", FileName: directory + "outTempAMT_Proteins.txt", HasFieldNames: true);
            accApplication.DoCmd.TransferText(TransferType: ACCESS.AcTextTransferType.acExportDelim,
                                              TableName: "AMT_to_Protein_Map", FileName: directory + "outTempAMT_to_Protein_Map.txt", HasFieldNames: true);
            accApplication.CloseCurrentDatabase();
            accApplication.Quit();

            // Put the data into its objects
            // AMT stuff going in Consensus targets
            // NET, MonoMass, Pred. Net, Peptide (Sequence with numeric mods), ID (can be crushed later)
            // OBSERVED <-- number of times this peptide was seen in the AMT
            // for <observed> times, add an evidence with the info? would make sense and would allow the stats calcs to be accurate
            // Prot stuff going into ProteinInfo
            // Prot name only thing important for MTDB, ID (can be crushed later)
            // AMT map
            // Link Consensus and Protein (ct[ct_id].protein.add(protein[prot_id]))

            var consensusTargets = new Dictionary <int, ConsensusTarget>();
            var proteins         = new Dictionary <int, ProteinInformation>();

            var ctReader   = new StreamReader(directory + "outTempAMT.txt");
            var protReader = new StreamReader(directory + "outTempAMT_Proteins.txt");
            var mapReader  = new StreamReader(directory + "outTempAMT_to_Protein_Map.txt");

            // Read the headers for the files
            ctReader.ReadLine();
            protReader.ReadLine();
            mapReader.ReadLine();

            // Read the first "Data" lines from the files
            var ctLine   = ctReader.ReadLine();
            var protLine = protReader.ReadLine();
            var mapLine  = mapReader.ReadLine();

            while (ctLine != null)
            {
                var pieces = ctLine.Split(',');

                var target = new ConsensusTarget
                {
                    Id = Convert.ToInt32(pieces[0]),
                    TheoreticalMonoIsotopicMass = Convert.ToDouble(pieces[1]),
                    AverageNet             = Convert.ToDouble(pieces[2]),
                    PredictedNet           = Convert.ToDouble(pieces[3]),
                    EncodedNumericSequence = pieces[6]
                };
                var totalEvidences = Convert.ToInt32(pieces[4]);
                var normScore      = Convert.ToDouble(pieces[5]);
                for (var evNum = 0; evNum < totalEvidences; evNum++)
                {
                    var evidence = new Evidence
                    {
                        ObservedNet = target.AverageNet,
                        ObservedMonoisotopicMass = target.TheoreticalMonoIsotopicMass,
                        PredictedNet             = target.PredictedNet,
                        NormalizedScore          = normScore,
                        SeqWithNumericMods       = target.EncodedNumericSequence,
                        Parent = target
                    };
                    target.Evidences.Add(evidence);
                }
                consensusTargets.Add(target.Id, target);
                ctLine = ctReader.ReadLine();
            }

            while (protLine != null)
            {
                var pieces = protLine.Split(',');

                var protein = new ProteinInformation
                {
                    ProteinName = pieces[1]
                };
                proteins.Add(Convert.ToInt32(pieces[0]), protein);
                protLine = protReader.ReadLine();
            }

            while (mapLine != null)
            {
                var pieces = mapLine.Split(',');

                consensusTargets[Convert.ToInt32(pieces[0])].AddProtein(proteins[Convert.ToInt32(pieces[1])]);
                mapLine = mapReader.ReadLine();
            }

            ctReader.Close();
            protReader.Close();
            mapReader.Close();

            File.Delete(directory + "outTempAMT.txt");
            File.Delete(directory + "outTempAMT_Proteins.txt");
            File.Delete(directory + "outTempAMT_to_Protein_Map.txt");

            var database = new TargetDatabase();

            foreach (var target in consensusTargets)
            {
                database.AddConsensusTarget(target.Value);
            }
            database.Proteins = proteins.Values.ToList();

            return(database);
        }
예제 #3
0
        private void ReadSqLite(string path)
        {
            // Don't read again if we just read the file
            if (path == m_lastReadFile)
            {
                return;
            }
            // Reset the data
            m_targetDb.ClearTargets();
            m_lcmsDataDic.Clear();

            //var sessionFactory = DatabaseReaderFactory.CreateSessionFactory(path);
            DatabaseFactory.DatabaseFile = path;
            DatabaseFactory.ReadOrAppend = true;
            var sessionFactory = DatabaseFactory.CreateSessionFactory(DatabaseType.SQLite);

            var readConsensus = new List <ConsensusTarget>();
            var readPair      = new List <ConsensusProteinPair>();
            var readProt      = new List <ProteinInformation>();
            var readEvidence  = new List <Evidence>();
            var readPtms      = new List <PostTranslationalModification>();
            var readPtmPairs  = new List <ConsensusPtmPair>();
            var readOptions   = new List <Options>();

            var consensusDic     = new Dictionary <int, ConsensusTarget>();
            var consensusProtDic = new Dictionary <int, List <ConsensusProteinPair> >();
            var consensusPtmDic  = new Dictionary <int, List <ConsensusPtmPair> >();
            var protDic          = new Dictionary <int, ProteinInformation>();
            var ptmDic           = new Dictionary <int, PostTranslationalModification>();

            using (var session = sessionFactory.OpenStatelessSession())
            {
                using (var transact = session.BeginTransaction())
                {
                    session.CreateCriteria <ProteinInformation>().List(readProt);
                    session.CreateCriteria <ConsensusTarget>().List(readConsensus);
                    session.CreateCriteria <PostTranslationalModification>().List(readPtms);
                    session.CreateCriteria <Options>().List(readOptions);
                    session.CreateCriteria <ConsensusProteinPair>().List(readPair);
                    session.CreateCriteria <ConsensusPtmPair>().List(readPtmPairs);
                    session.CreateCriteria <Evidence>().List(readEvidence);
                    transact.Commit();
                }

                /*
                 * using (var transact = session.BeginTransaction())
                 * {
                 *  session.CreateCriteria<ConsensusProteinPair>().List(readPair);
                 *  session.CreateCriteria<ConsensusPtmPair>().List(readPtmPairs);
                 *  session.CreateCriteria<Evidence>().List(readEvidence);
                 *  transact.Commit();
                 * }
                 */

                foreach (var consensus in readConsensus)
                {
                    consensus.Ptms.Clear();
                    //consensus.Evidences.Clear();
                    consensus.Evidences = new List <Evidence>();
                    consensus.Sequence  = consensus.CleanSequence;
                    m_targetDb.AddConsensusTarget(consensus);
                    consensusDic.Add(consensus.Id, consensus);
                }

                foreach (var pair in readPair)
                {
                    if (!consensusProtDic.ContainsKey(pair.Consensus.Id))
                    {
                        consensusProtDic.Add(pair.Consensus.Id, new List <ConsensusProteinPair>());
                    }
                    consensusProtDic[pair.Consensus.Id].Add(pair);
                }

                foreach (var pair in readPtmPairs)
                {
                    if (!consensusPtmDic.ContainsKey(pair.Target.Id))
                    {
                        consensusPtmDic.Add(pair.Target.Id, new List <ConsensusPtmPair>());
                    }
                    consensusPtmDic[pair.Target.Id].Add(pair);
                }

                foreach (var prot in readProt)
                {
                    protDic.Add(prot.Id, prot);
                }

                foreach (var ptm in readPtms)
                {
                    ptmDic.Add(ptm.Id, ptm);
                }

                foreach (var consensus in consensusPtmDic)
                {
                    foreach (var pair in consensus.Value)
                    {
                        var ptm = new PostTranslationalModification
                        {
                            Mass     = ptmDic[pair.PostTranslationalModification.Id].Mass,
                            Name     = ptmDic[pair.PostTranslationalModification.Id].Name,
                            Formula  = ptmDic[pair.PostTranslationalModification.Id].Formula,
                            Location = pair.Location,
                            Parent   = consensusDic[pair.Target.Id]
                        };

                        consensusDic[pair.Target.Id].Ptms.Add(ptm);
                    }
                }

                foreach (var evidence in readEvidence)
                {
                    foreach (var pair in consensusProtDic[evidence.Parent.Id])
                    {
                        var prot = protDic[pair.Protein.Id];
                        prot.ResidueEnd    = pair.ResidueEnd;
                        prot.ResidueStart  = pair.ResidueStart;
                        prot.TerminusState = (clsPeptideCleavageStateCalculator.ePeptideTerminusStateConstants)pair.TerminusState;
                        prot.CleavageState = (clsPeptideCleavageStateCalculator.ePeptideCleavageStateConstants)pair.CleavageState;
                        //prot.Id = 0;
                        evidence.AddProtein(prot);
                        consensusDic[evidence.Parent.Id].AddProtein(prot);
                    }
                    evidence.MonoisotopicMass = consensusDic[evidence.Parent.Id].TheoreticalMonoIsotopicMass;
                    evidence.Ptms             = consensusDic[evidence.Parent.Id].Ptms;

                    if (!m_lcmsDataDic.ContainsKey(evidence.DataSet.Name))
                    {
                        var dataset = new LcmsDataSet(true);
                        m_lcmsDataDic.Add(evidence.DataSet.Name, dataset);
                        m_lcmsDataDic[evidence.DataSet.Name].Name = evidence.DataSet.Name;
                        m_lcmsDataDic[evidence.DataSet.Name].Tool = evidence.DataSet.Tool;
                    }
                    m_lcmsDataDic[evidence.DataSet.Name].Evidences.Add(evidence);
                    consensusDic[evidence.Parent.Id].AddEvidence(evidence);
                }
            }
            // Set the member variable to avoid double reads.
            m_lastReadFile = path;
        }