Ejemplo n.º 1
0
        /// <summary>
        /// Store PSM Data
        /// </summary>
        /// <param name="result"></param>
        /// <param name="reader"></param>
        /// <param name="specProb"></param>
        protected void StorePsmData(Evidence result, clsPHRPReader reader, double specProb)
        {
            result.Charge                   = reader.CurrentPSM.Charge;
            result.CleanPeptide             = reader.CurrentPSM.PeptideCleanSequence;
            result.SeqWithNumericMods       = reader.CurrentPSM.PeptideWithNumericMods;
            result.MonoisotopicMass         = reader.CurrentPSM.PeptideMonoisotopicMass;
            result.ObservedMonoisotopicMass = reader.CurrentPSM.PrecursorNeutralMass;
            result.MultiProteinCount        = (short)reader.CurrentPSM.Proteins.Count;
            result.Scan     = reader.CurrentPSM.ScanNumber;
            result.Sequence = reader.CurrentPSM.Peptide;
            result.Mz       =
                clsPeptideMassCalculator.ConvoluteMass(reader.CurrentPSM.PrecursorNeutralMass, 0,
                                                       reader.CurrentPSM.Charge);
            result.SpecProb          = specProb;
            result.DelM              = Convert.ToDouble(reader.CurrentPSM.MassErrorDa);
            result.ModificationCount = (short)reader.CurrentPSM.ModifiedResidues.Count;

            result.PeptideInfo = new TargetPeptideInfo
            {
                Peptide                = result.Sequence,
                CleanPeptide           = result.CleanPeptide,
                PeptideWithNumericMods = result.SeqWithNumericMods
            };

            if (reader.CurrentPSM.MassErrorPPM.Length != 0)
            {
                result.DelMPpm = Convert.ToDouble(reader.CurrentPSM.MassErrorPPM);
            }

            result.SeqInfoMonoisotopicMass = result.MonoisotopicMass;

            StoreProteinInfo(reader, result);

            if (result.ModificationCount != 0)
            {
                foreach (var info in reader.CurrentPSM.ModifiedResidues)
                {
                    result.ModificationDescription += info.ModDefinition.MassCorrectionTag + ":" + info.ResidueLocInPeptide + " ";
                    var ptm = new PostTranslationalModification
                    {
                        Location = info.ResidueLocInPeptide,
                        Mass     = info.ModDefinition.ModificationMass,
                        Formula  = info.ModDefinition.MassCorrectionTag,
                        Name     = info.ModDefinition.MassCorrectionTag
                    };
                    result.Ptms.Add(ptm);
                }

                var encodedSeq = result.Sequence[0] + ".";
                int j          = 0;
                foreach (var ptm in result.Ptms)
                {
                    for (; j < ptm.Location; j++)
                    {
                        encodedSeq = encodedSeq + result.CleanPeptide[j];
                    }

                    encodedSeq += "[" + ((ptm.Mass > 0)? "+":"-") + ptm.Formula + "]";
                }
                for (; j < result.CleanPeptide.Length; j++)
                {
                    encodedSeq += result.CleanPeptide[j];
                }
                encodedSeq += "." + result.Sequence.Last();
                result.EncodedNonNumericSequence = encodedSeq;
            }
            else
            {
                result.EncodedNonNumericSequence = result.Sequence;
            }
        }
Ejemplo n.º 2
0
        private void RetrieveDataFromTextFiles(string directory)
        {
            using (var reader = new StreamReader(directory + "tempModInfo.txt"))
            {
                reader.ReadLine();
                var row = reader.ReadLine();
                while (!string.IsNullOrEmpty(row))
                {
                    var rowPieces = row.Split(m_separator);
                    m_modTagsToModMass.Add(rowPieces[0],
                                           new Tuple <double, string>(Convert.ToDouble(rowPieces[1]), rowPieces[2]));
                    row = reader.ReadLine();
                }
            }

            var ptmId     = 1;
            var targetId  = 1;
            var ctToPtmId = 1;

            using (var reader = new StreamReader(directory + "tempMassTags.txt"))
            {
                reader.ReadLine();
                var row = reader.ReadLine();
                while (!string.IsNullOrEmpty(row))
                {
                    var rowPieces = row.Split(m_separator);
                    var target    = new ConsensusTarget();
                    target.Id = targetId++;
                    var quote          = "";
                    var unescapedPiece = rowPieces[14].Replace("\"\"", quote);
                    var sequence       = rowPieces[1];
                    if (unescapedPiece != "")
                    {
                        var unquotedPiece = unescapedPiece.Substring(1, rowPieces[14].Length - 2);
                        var mods          = unquotedPiece.Split(',');
                        foreach (var mod in mods)
                        {
                            var modPieces = mod.Split(':');
                            var modMass   = m_modTagsToModMass[modPieces[0]].Item1;
                            var ptm       = new PostTranslationalModification();
                            ptm.Name = modPieces[0];
                            if (!m_ptmDictionary.ContainsKey(ptm.Name))
                            {
                                ptm.Mass    = modMass;
                                ptm.Id      = ptmId++;
                                ptm.Formula = m_modTagsToModMass[modPieces[0]].Item2;
                                m_ptmDictionary.Add(ptm.Name, ptm);
                            }
                            target.Ptms.Add(m_ptmDictionary[ptm.Name]);
                            ptm.Location = Convert.ToInt32(modPieces[1]);

                            var ctToPtm = new ConsensusPtmPair
                            {
                                ConsensusId = target.Id,
                                PtmId       = m_ptmDictionary[ptm.Name].Id,
                                Location    = ptm.Location,
                                Id          = ctToPtmId++
                            };

                            m_consensusTargetToPtmDict[ctToPtm.Id] = ctToPtm;
                        }
                        target.ModificationDescription = unquotedPiece;
                    }
                    var fullSequence = sequence;
                    var backPtms     = target.Ptms.OrderByDescending(x => x.Location);
                    foreach (var ptm in backPtms)
                    {
                        if (ptm.Location == rowPieces[1].Length)
                        {
                            rowPieces[1] += ptm.Mass.ToString();
                        }
                        else
                        {
                            rowPieces[1] = fullSequence.Insert(ptm.Location, ptm.Mass.ToString());
                        }
                    }
                    target.EncodedNumericSequence = rowPieces[1];
                    target.Sequence = fullSequence;
                    target.TheoreticalMonoIsotopicMass = Convert.ToDouble(rowPieces[2]);
                    target.MultiProteinCount           = Convert.ToInt16(rowPieces[4]);
                    target.ModificationCount           = Convert.ToInt16(rowPieces[13]);

                    m_idToMassTagDict.Add(Convert.ToInt32(rowPieces[0]), row);
                    m_idToConensusTargetDict.Add(Convert.ToInt32(rowPieces[0]), target);

                    row = reader.ReadLine();
                }
            }

            using (var reader = new StreamReader(directory + "tempMassTagsNet.txt"))
            {
                reader.ReadLine();
                var row = reader.ReadLine();
                while (!string.IsNullOrEmpty(row))
                {
                    var rowPieces = row.Split(m_separator);

                    var id = Convert.ToInt32(rowPieces[0]);
                    m_idToConensusTargetDict[id].PredictedNet = Convert.ToDouble(rowPieces[7]);
                    m_idToConensusTargetDict[id].StdevNet     = Convert.ToDouble(rowPieces[5]);
                    m_idToConensusTargetDict[id].AverageNet   = Convert.ToDouble(rowPieces[3]);

                    row = reader.ReadLine();
                }
            }

            var proteinId = 1;

            using (var reader = new StreamReader(directory + "tempProteins.txt"))
            {
                reader.ReadLine();
                var row = reader.ReadLine();
                while (!string.IsNullOrEmpty(row))
                {
                    var rowPieces     = row.Split(m_separator);
                    var unquotedPiece = rowPieces[1].Substring(1, rowPieces[1].Length - 2);
                    var prot          = new ProteinInformation {
                        Id = proteinId++, ProteinName = unquotedPiece
                    };

                    m_idToProteinDict[Convert.ToInt32(rowPieces[0])] = prot;

                    row = reader.ReadLine();
                }
            }

            var cppId = 1;

            using (var reader = new StreamReader(directory + "tempMassTagToProteins.txt"))
            {
                reader.ReadLine();
                var row = reader.ReadLine();
                while (!string.IsNullOrEmpty(row))
                {
                    var rowPieces = row.Split(m_separator);
                    var mt_id     = Convert.ToInt32(rowPieces[0]);
                    var prot_id   = Convert.ToInt32(rowPieces[2]);
                    var ctToProt  = new ConsensusProteinPair();
                    ctToProt.CleavageState = Convert.ToInt16(rowPieces[3]);
                    ctToProt.ResidueStart  = Convert.ToInt32(rowPieces[6]);
                    ctToProt.ResidueEnd    = Convert.ToInt32(rowPieces[7]);
                    ctToProt.TerminusState = Convert.ToInt16(rowPieces[9]);
                    ctToProt.ConsensusId   = m_idToConensusTargetDict[mt_id].Id;
                    ctToProt.ProteinId     = m_idToProteinDict[prot_id].Id;
                    m_ctToProtDict[cppId]  = ctToProt;
                    cppId++;

                    row = reader.ReadLine();
                }
            }

            var totalCharges = 0;
            var evId         = 1;

            using (var reader = new StreamReader(directory + "tempPeptides.txt"))
            {
                reader.ReadLine();
                var row = reader.ReadLine();
                while (!string.IsNullOrEmpty(row))
                {
                    var rowPieces = row.Split(m_separator);
                    var id        = Convert.ToInt32(rowPieces[0]);
                    if (!m_idToChargeAndPeptide.ContainsKey(id))
                    {
                        m_idToChargeAndPeptide[id] = new Tuple <string, List <short> >(rowPieces[1], new List <short>());
                        m_idToChargeAndPeptide[id].Item2.Add(Convert.ToInt16(rowPieces[2]));
                        m_idToConensusTargetDict[id].Sequence = rowPieces[1][0] + "." +
                                                                m_idToConensusTargetDict[id].Sequence + "." +
                                                                rowPieces[1][rowPieces[1].Length - 1];
                        m_idToConensusTargetDict[id].CleanSequence = m_idToConensusTargetDict[id].Sequence;
                        m_idToConensusTargetDict[id].Charges.Add(Convert.ToInt16(rowPieces[2]));
                        totalCharges++;
                    }
                    if (!m_idToChargeAndPeptide[id].Item2.Contains(Convert.ToInt16(rowPieces[2])))
                    {
                        m_idToChargeAndPeptide[id].Item2.Add(Convert.ToInt16(rowPieces[2]));
                        m_idToConensusTargetDict[id].Charges.Add(Convert.ToInt16(rowPieces[2]));
                        totalCharges++;
                    }

                    var ctId = m_idToConensusTargetDict[id].Id;
                    var ev   = new Evidence();
                    ev.Id          = evId++;
                    ev.Charge      = Convert.ToInt16(rowPieces[2]);
                    ev.Sequence    = m_idToConensusTargetDict[id].CleanSequence;
                    ev.Scan        = Convert.ToInt32(rowPieces[3]);
                    ev.DelMPpm     = Convert.ToDouble(rowPieces[4]);
                    ev.ObservedNet = Convert.ToDouble(rowPieces[5]);
                    ev.ObservedMonoisotopicMass = Convert.ToDouble(rowPieces[6]);
                    ev.Mz       = ev.ObservedMonoisotopicMass / ev.Charge;
                    ev.NetShift = 0;
                    ev.DelM     = ev.DelMPpm / 1000000;
                    ev.Parent   = m_idToConensusTargetDict[id];

                    m_evidenceDict[evId] = ev;
                    if (!m_ctToEvidenceMap.ContainsKey(ctId))
                    {
                        m_ctToEvidenceMap[ctId] = new List <int>();
                    }
                    m_ctToEvidenceMap[ctId].Add(evId);

                    row = reader.ReadLine();
                }
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Map the results of a MZIdentML read to MSGF+
        /// </summary>
        /// <param name="results">Object to populate with the results of the Mapping</param>
        /// <param name="path">Path to MZIdentML file</param>
        private void MapToMsgf(List <MsgfPlusResult> results, string path)
        {
            var filter = new MsgfPlusTargetFilter(ReaderOptions);

            var cleavageStateCalculator = new clsPeptideCleavageStateCalculator();

            var i     = 0;
            var total = m_specItems.Count;

            // Go through each Spectrum ID and map it to an MSGF+ result
            foreach (var item in m_specItems)
            {
                i++;
                if (i % 500 == 0)
                {
                    UpdateProgress((100 * ((float)i / total)));
                }
                // Skip this PSM if it doesn't pass the import filters
                // Note that qValue is basically FDR
                double qValue = item.Value.QValue;

                double specProb = item.Value.SpecEv;

                if (filter.ShouldFilter(qValue, specProb))
                {
                    continue;
                }

                if (item.Value.PepEvidence.Count == 0)
                {
                    continue;
                }

                var evidence = item.Value.PepEvidence[0];

                var result = new MsgfPlusResult
                {
                    AnalysisId               = i,
                    Charge                   = Convert.ToInt16(item.Value.Charge),
                    CleanPeptide             = item.Value.Peptide.Sequence,
                    SeqWithNumericMods       = null,
                    MonoisotopicMass         = clsPeptideMassCalculator.ConvoluteMass(item.Value.CalMz, item.Value.Charge, 0),
                    ObservedMonoisotopicMass = clsPeptideMassCalculator.ConvoluteMass(item.Value.ExperimentalMz, item.Value.Charge, 0),
                    MultiProteinCount        = Convert.ToInt16(item.Value.PepEvCount),
                    Scan              = item.Value.ScanNum,
                    Sequence          = evidence.Pre + "." + item.Value.Peptide.Sequence + "." + evidence.Post,
                    Mz                = 0,
                    SpecProb          = specProb,
                    DelM              = 0,
                    ModificationCount = Convert.ToInt16(item.Value.Peptide.Mods.Count)
                };

                // Populate some mass related items
                result.DelM    = result.ObservedMonoisotopicMass - result.MonoisotopicMass;
                result.DelMPpm = clsPeptideMassCalculator.MassToPPM(result.DelM, result.ObservedMonoisotopicMass);
                // We could compute m/z:
                //     Mz = clsPeptideMassCalculator.ConvoluteMass(result.ObservedMonoisotopicMass, 0, result.Charge);
                // But it's stored in the mzid file, so we'll use that
                result.Mz = item.Value.ExperimentalMz;

                StoreDatasetInfo(result, path);

                result.DataSet.Tool = LcmsIdentificationTool.MZIdentML;

                // Populate items specific to the MSGF+ results (stored as mzid)

                result.Reference = evidence.DbSeq.Accession;

                var eCleavageState = cleavageStateCalculator.ComputeCleavageState(item.Value.Peptide.Sequence, evidence.Pre, evidence.Post);
                result.NumTrypticEnds = clsPeptideCleavageStateCalculator.CleavageStateToShort(eCleavageState);

                result.DeNovoScore    = item.Value.DeNovoScore;
                result.MsgfScore      = item.Value.RawScore;
                result.SpecEValue     = item.Value.SpecEv;
                result.RankSpecEValue = item.Value.Rank;

                result.EValue            = item.Value.EValue;
                result.QValue            = qValue;
                result.DiscriminantValue = qValue;
                result.PepQValue         = item.Value.PepQValue;

                result.IsotopeError = item.Value.IsoError;

                if (result.ModificationCount > 0)
                {
                    var j = 0;

                    var numModSeq  = evidence.Pre + ".";
                    var encodedSeq = numModSeq;
                    foreach (var mod in item.Value.Peptide.Mods)
                    {
                        var ptm = new PostTranslationalModification
                        {
                            Location = mod.Key,
                            Mass     = mod.Value.Mass,
                            Formula  = UniModData.ModList[mod.Value.Tag].Formula.ToString(),
                            Name     = UniModData.ModList[mod.Value.Tag].Title
                        };
                        result.Ptms.Add(ptm);

                        for (; j < ptm.Location; j++)
                        {
                            numModSeq  = numModSeq + item.Value.Peptide.Sequence[j];
                            encodedSeq = encodedSeq + item.Value.Peptide.Sequence[j];
                        }

                        numModSeq += (ptm.Mass > 0) ? "+" : "-";
                        numModSeq  = numModSeq + ptm.Mass;

                        encodedSeq += "[" + ((ptm.Mass > 0)? "+":"-") + ptm.Formula + "]";
                    }
                    for (; j < item.Value.Peptide.Sequence.Length; j++)
                    {
                        numModSeq   = numModSeq + item.Value.Peptide.Sequence[j];
                        encodedSeq += item.Value.Peptide.Sequence[j];
                    }
                    numModSeq   = numModSeq + "." + evidence.Post;
                    encodedSeq += "." + evidence.Post;
                    result.SeqWithNumericMods        = numModSeq;
                    result.EncodedNonNumericSequence = encodedSeq;
                }
                else
                {
                    result.SeqWithNumericMods        = result.Sequence;
                    result.EncodedNonNumericSequence = result.Sequence;
                }

                result.PeptideInfo = new TargetPeptideInfo
                {
                    Peptide                = result.Sequence,
                    CleanPeptide           = result.CleanPeptide,
                    PeptideWithNumericMods = result.SeqWithNumericMods
                };


                result.SeqInfoMonoisotopicMass = result.MonoisotopicMass;
                result.ModificationDescription = null;

                foreach (var thing in item.Value.PepEvidence)
                {
                    var protein = new ProteinInformation
                    {
                        ProteinName  = thing.DbSeq.Accession,
                        ResidueStart = thing.Start,
                        ResidueEnd   = thing.End
                    };
                    ComputeTerminusState(evidence, result.NumTrypticEnds, protein);
                    result.Proteins.Add(protein);
                }

                if (result.ModificationCount > 0)
                {
                    foreach (var mod in item.Value.Peptide.Mods)
                    {
                        // TODO: Confirm that this is valid math (MEM thinks it may not be)
                        result.SeqInfoMonoisotopicMass += mod.Value.Mass;

                        result.ModificationDescription += mod.Value.Tag + ":" + mod.Key + "  ";
                    }
                }

                results.Add(result);
            }
        }
Ejemplo n.º 4
0
        private void ReadSqLite(string path)
        {
            // Don't read again if we just read the file
            if (path == m_lastReadFile)
            {
                return;
            }
            // Reset the data
            m_targetDb.ClearTargets();
            m_lcmsDataDic.Clear();

            //var sessionFactory = DatabaseReaderFactory.CreateSessionFactory(path);
            DatabaseFactory.DatabaseFile = path;
            DatabaseFactory.ReadOrAppend = true;
            var sessionFactory = DatabaseFactory.CreateSessionFactory(DatabaseType.SQLite);

            var readConsensus = new List <ConsensusTarget>();
            var readPair      = new List <ConsensusProteinPair>();
            var readProt      = new List <ProteinInformation>();
            var readEvidence  = new List <Evidence>();
            var readPtms      = new List <PostTranslationalModification>();
            var readPtmPairs  = new List <ConsensusPtmPair>();
            var readOptions   = new List <Options>();

            var consensusDic     = new Dictionary <int, ConsensusTarget>();
            var consensusProtDic = new Dictionary <int, List <ConsensusProteinPair> >();
            var consensusPtmDic  = new Dictionary <int, List <ConsensusPtmPair> >();
            var protDic          = new Dictionary <int, ProteinInformation>();
            var ptmDic           = new Dictionary <int, PostTranslationalModification>();

            using (var session = sessionFactory.OpenStatelessSession())
            {
                using (var transact = session.BeginTransaction())
                {
                    session.CreateCriteria <ProteinInformation>().List(readProt);
                    session.CreateCriteria <ConsensusTarget>().List(readConsensus);
                    session.CreateCriteria <PostTranslationalModification>().List(readPtms);
                    session.CreateCriteria <Options>().List(readOptions);
                    session.CreateCriteria <ConsensusProteinPair>().List(readPair);
                    session.CreateCriteria <ConsensusPtmPair>().List(readPtmPairs);
                    session.CreateCriteria <Evidence>().List(readEvidence);
                    transact.Commit();
                }

                /*
                 * using (var transact = session.BeginTransaction())
                 * {
                 *  session.CreateCriteria<ConsensusProteinPair>().List(readPair);
                 *  session.CreateCriteria<ConsensusPtmPair>().List(readPtmPairs);
                 *  session.CreateCriteria<Evidence>().List(readEvidence);
                 *  transact.Commit();
                 * }
                 */

                foreach (var consensus in readConsensus)
                {
                    consensus.Ptms.Clear();
                    //consensus.Evidences.Clear();
                    consensus.Evidences = new List <Evidence>();
                    consensus.Sequence  = consensus.CleanSequence;
                    m_targetDb.AddConsensusTarget(consensus);
                    consensusDic.Add(consensus.Id, consensus);
                }

                foreach (var pair in readPair)
                {
                    if (!consensusProtDic.ContainsKey(pair.Consensus.Id))
                    {
                        consensusProtDic.Add(pair.Consensus.Id, new List <ConsensusProteinPair>());
                    }
                    consensusProtDic[pair.Consensus.Id].Add(pair);
                }

                foreach (var pair in readPtmPairs)
                {
                    if (!consensusPtmDic.ContainsKey(pair.Target.Id))
                    {
                        consensusPtmDic.Add(pair.Target.Id, new List <ConsensusPtmPair>());
                    }
                    consensusPtmDic[pair.Target.Id].Add(pair);
                }

                foreach (var prot in readProt)
                {
                    protDic.Add(prot.Id, prot);
                }

                foreach (var ptm in readPtms)
                {
                    ptmDic.Add(ptm.Id, ptm);
                }

                foreach (var consensus in consensusPtmDic)
                {
                    foreach (var pair in consensus.Value)
                    {
                        var ptm = new PostTranslationalModification
                        {
                            Mass     = ptmDic[pair.PostTranslationalModification.Id].Mass,
                            Name     = ptmDic[pair.PostTranslationalModification.Id].Name,
                            Formula  = ptmDic[pair.PostTranslationalModification.Id].Formula,
                            Location = pair.Location,
                            Parent   = consensusDic[pair.Target.Id]
                        };

                        consensusDic[pair.Target.Id].Ptms.Add(ptm);
                    }
                }

                foreach (var evidence in readEvidence)
                {
                    foreach (var pair in consensusProtDic[evidence.Parent.Id])
                    {
                        var prot = protDic[pair.Protein.Id];
                        prot.ResidueEnd    = pair.ResidueEnd;
                        prot.ResidueStart  = pair.ResidueStart;
                        prot.TerminusState = (clsPeptideCleavageStateCalculator.ePeptideTerminusStateConstants)pair.TerminusState;
                        prot.CleavageState = (clsPeptideCleavageStateCalculator.ePeptideCleavageStateConstants)pair.CleavageState;
                        //prot.Id = 0;
                        evidence.AddProtein(prot);
                        consensusDic[evidence.Parent.Id].AddProtein(prot);
                    }
                    evidence.MonoisotopicMass = consensusDic[evidence.Parent.Id].TheoreticalMonoIsotopicMass;
                    evidence.Ptms             = consensusDic[evidence.Parent.Id].Ptms;

                    if (!m_lcmsDataDic.ContainsKey(evidence.DataSet.Name))
                    {
                        var dataset = new LcmsDataSet(true);
                        m_lcmsDataDic.Add(evidence.DataSet.Name, dataset);
                        m_lcmsDataDic[evidence.DataSet.Name].Name = evidence.DataSet.Name;
                        m_lcmsDataDic[evidence.DataSet.Name].Tool = evidence.DataSet.Tool;
                    }
                    m_lcmsDataDic[evidence.DataSet.Name].Evidences.Add(evidence);
                    consensusDic[evidence.Parent.Id].AddEvidence(evidence);
                }
            }
            // Set the member variable to avoid double reads.
            m_lastReadFile = path;
        }