/// <summary> /// Store PSM Data /// </summary> /// <param name="result"></param> /// <param name="reader"></param> /// <param name="specProb"></param> protected void StorePsmData(Evidence result, clsPHRPReader reader, double specProb) { result.Charge = reader.CurrentPSM.Charge; result.CleanPeptide = reader.CurrentPSM.PeptideCleanSequence; result.SeqWithNumericMods = reader.CurrentPSM.PeptideWithNumericMods; result.MonoisotopicMass = reader.CurrentPSM.PeptideMonoisotopicMass; result.ObservedMonoisotopicMass = reader.CurrentPSM.PrecursorNeutralMass; result.MultiProteinCount = (short)reader.CurrentPSM.Proteins.Count; result.Scan = reader.CurrentPSM.ScanNumber; result.Sequence = reader.CurrentPSM.Peptide; result.Mz = clsPeptideMassCalculator.ConvoluteMass(reader.CurrentPSM.PrecursorNeutralMass, 0, reader.CurrentPSM.Charge); result.SpecProb = specProb; result.DelM = Convert.ToDouble(reader.CurrentPSM.MassErrorDa); result.ModificationCount = (short)reader.CurrentPSM.ModifiedResidues.Count; result.PeptideInfo = new TargetPeptideInfo { Peptide = result.Sequence, CleanPeptide = result.CleanPeptide, PeptideWithNumericMods = result.SeqWithNumericMods }; if (reader.CurrentPSM.MassErrorPPM.Length != 0) { result.DelMPpm = Convert.ToDouble(reader.CurrentPSM.MassErrorPPM); } result.SeqInfoMonoisotopicMass = result.MonoisotopicMass; StoreProteinInfo(reader, result); if (result.ModificationCount != 0) { foreach (var info in reader.CurrentPSM.ModifiedResidues) { result.ModificationDescription += info.ModDefinition.MassCorrectionTag + ":" + info.ResidueLocInPeptide + " "; var ptm = new PostTranslationalModification { Location = info.ResidueLocInPeptide, Mass = info.ModDefinition.ModificationMass, Formula = info.ModDefinition.MassCorrectionTag, Name = info.ModDefinition.MassCorrectionTag }; result.Ptms.Add(ptm); } var encodedSeq = result.Sequence[0] + "."; int j = 0; foreach (var ptm in result.Ptms) { for (; j < ptm.Location; j++) { encodedSeq = encodedSeq + result.CleanPeptide[j]; } encodedSeq += "[" + ((ptm.Mass > 0)? "+":"-") + ptm.Formula + "]"; } for (; j < result.CleanPeptide.Length; j++) { encodedSeq += result.CleanPeptide[j]; } encodedSeq += "." + result.Sequence.Last(); result.EncodedNonNumericSequence = encodedSeq; } else { result.EncodedNonNumericSequence = result.Sequence; } }
private void RetrieveDataFromTextFiles(string directory) { using (var reader = new StreamReader(directory + "tempModInfo.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); m_modTagsToModMass.Add(rowPieces[0], new Tuple <double, string>(Convert.ToDouble(rowPieces[1]), rowPieces[2])); row = reader.ReadLine(); } } var ptmId = 1; var targetId = 1; var ctToPtmId = 1; using (var reader = new StreamReader(directory + "tempMassTags.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var target = new ConsensusTarget(); target.Id = targetId++; var quote = ""; var unescapedPiece = rowPieces[14].Replace("\"\"", quote); var sequence = rowPieces[1]; if (unescapedPiece != "") { var unquotedPiece = unescapedPiece.Substring(1, rowPieces[14].Length - 2); var mods = unquotedPiece.Split(','); foreach (var mod in mods) { var modPieces = mod.Split(':'); var modMass = m_modTagsToModMass[modPieces[0]].Item1; var ptm = new PostTranslationalModification(); ptm.Name = modPieces[0]; if (!m_ptmDictionary.ContainsKey(ptm.Name)) { ptm.Mass = modMass; ptm.Id = ptmId++; ptm.Formula = m_modTagsToModMass[modPieces[0]].Item2; m_ptmDictionary.Add(ptm.Name, ptm); } target.Ptms.Add(m_ptmDictionary[ptm.Name]); ptm.Location = Convert.ToInt32(modPieces[1]); var ctToPtm = new ConsensusPtmPair { ConsensusId = target.Id, PtmId = m_ptmDictionary[ptm.Name].Id, Location = ptm.Location, Id = ctToPtmId++ }; m_consensusTargetToPtmDict[ctToPtm.Id] = ctToPtm; } target.ModificationDescription = unquotedPiece; } var fullSequence = sequence; var backPtms = target.Ptms.OrderByDescending(x => x.Location); foreach (var ptm in backPtms) { if (ptm.Location == rowPieces[1].Length) { rowPieces[1] += ptm.Mass.ToString(); } else { rowPieces[1] = fullSequence.Insert(ptm.Location, ptm.Mass.ToString()); } } target.EncodedNumericSequence = rowPieces[1]; target.Sequence = fullSequence; target.TheoreticalMonoIsotopicMass = Convert.ToDouble(rowPieces[2]); target.MultiProteinCount = Convert.ToInt16(rowPieces[4]); target.ModificationCount = Convert.ToInt16(rowPieces[13]); m_idToMassTagDict.Add(Convert.ToInt32(rowPieces[0]), row); m_idToConensusTargetDict.Add(Convert.ToInt32(rowPieces[0]), target); row = reader.ReadLine(); } } using (var reader = new StreamReader(directory + "tempMassTagsNet.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var id = Convert.ToInt32(rowPieces[0]); m_idToConensusTargetDict[id].PredictedNet = Convert.ToDouble(rowPieces[7]); m_idToConensusTargetDict[id].StdevNet = Convert.ToDouble(rowPieces[5]); m_idToConensusTargetDict[id].AverageNet = Convert.ToDouble(rowPieces[3]); row = reader.ReadLine(); } } var proteinId = 1; using (var reader = new StreamReader(directory + "tempProteins.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var unquotedPiece = rowPieces[1].Substring(1, rowPieces[1].Length - 2); var prot = new ProteinInformation { Id = proteinId++, ProteinName = unquotedPiece }; m_idToProteinDict[Convert.ToInt32(rowPieces[0])] = prot; row = reader.ReadLine(); } } var cppId = 1; using (var reader = new StreamReader(directory + "tempMassTagToProteins.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var mt_id = Convert.ToInt32(rowPieces[0]); var prot_id = Convert.ToInt32(rowPieces[2]); var ctToProt = new ConsensusProteinPair(); ctToProt.CleavageState = Convert.ToInt16(rowPieces[3]); ctToProt.ResidueStart = Convert.ToInt32(rowPieces[6]); ctToProt.ResidueEnd = Convert.ToInt32(rowPieces[7]); ctToProt.TerminusState = Convert.ToInt16(rowPieces[9]); ctToProt.ConsensusId = m_idToConensusTargetDict[mt_id].Id; ctToProt.ProteinId = m_idToProteinDict[prot_id].Id; m_ctToProtDict[cppId] = ctToProt; cppId++; row = reader.ReadLine(); } } var totalCharges = 0; var evId = 1; using (var reader = new StreamReader(directory + "tempPeptides.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var id = Convert.ToInt32(rowPieces[0]); if (!m_idToChargeAndPeptide.ContainsKey(id)) { m_idToChargeAndPeptide[id] = new Tuple <string, List <short> >(rowPieces[1], new List <short>()); m_idToChargeAndPeptide[id].Item2.Add(Convert.ToInt16(rowPieces[2])); m_idToConensusTargetDict[id].Sequence = rowPieces[1][0] + "." + m_idToConensusTargetDict[id].Sequence + "." + rowPieces[1][rowPieces[1].Length - 1]; m_idToConensusTargetDict[id].CleanSequence = m_idToConensusTargetDict[id].Sequence; m_idToConensusTargetDict[id].Charges.Add(Convert.ToInt16(rowPieces[2])); totalCharges++; } if (!m_idToChargeAndPeptide[id].Item2.Contains(Convert.ToInt16(rowPieces[2]))) { m_idToChargeAndPeptide[id].Item2.Add(Convert.ToInt16(rowPieces[2])); m_idToConensusTargetDict[id].Charges.Add(Convert.ToInt16(rowPieces[2])); totalCharges++; } var ctId = m_idToConensusTargetDict[id].Id; var ev = new Evidence(); ev.Id = evId++; ev.Charge = Convert.ToInt16(rowPieces[2]); ev.Sequence = m_idToConensusTargetDict[id].CleanSequence; ev.Scan = Convert.ToInt32(rowPieces[3]); ev.DelMPpm = Convert.ToDouble(rowPieces[4]); ev.ObservedNet = Convert.ToDouble(rowPieces[5]); ev.ObservedMonoisotopicMass = Convert.ToDouble(rowPieces[6]); ev.Mz = ev.ObservedMonoisotopicMass / ev.Charge; ev.NetShift = 0; ev.DelM = ev.DelMPpm / 1000000; ev.Parent = m_idToConensusTargetDict[id]; m_evidenceDict[evId] = ev; if (!m_ctToEvidenceMap.ContainsKey(ctId)) { m_ctToEvidenceMap[ctId] = new List <int>(); } m_ctToEvidenceMap[ctId].Add(evId); row = reader.ReadLine(); } } }
/// <summary> /// Map the results of a MZIdentML read to MSGF+ /// </summary> /// <param name="results">Object to populate with the results of the Mapping</param> /// <param name="path">Path to MZIdentML file</param> private void MapToMsgf(List <MsgfPlusResult> results, string path) { var filter = new MsgfPlusTargetFilter(ReaderOptions); var cleavageStateCalculator = new clsPeptideCleavageStateCalculator(); var i = 0; var total = m_specItems.Count; // Go through each Spectrum ID and map it to an MSGF+ result foreach (var item in m_specItems) { i++; if (i % 500 == 0) { UpdateProgress((100 * ((float)i / total))); } // Skip this PSM if it doesn't pass the import filters // Note that qValue is basically FDR double qValue = item.Value.QValue; double specProb = item.Value.SpecEv; if (filter.ShouldFilter(qValue, specProb)) { continue; } if (item.Value.PepEvidence.Count == 0) { continue; } var evidence = item.Value.PepEvidence[0]; var result = new MsgfPlusResult { AnalysisId = i, Charge = Convert.ToInt16(item.Value.Charge), CleanPeptide = item.Value.Peptide.Sequence, SeqWithNumericMods = null, MonoisotopicMass = clsPeptideMassCalculator.ConvoluteMass(item.Value.CalMz, item.Value.Charge, 0), ObservedMonoisotopicMass = clsPeptideMassCalculator.ConvoluteMass(item.Value.ExperimentalMz, item.Value.Charge, 0), MultiProteinCount = Convert.ToInt16(item.Value.PepEvCount), Scan = item.Value.ScanNum, Sequence = evidence.Pre + "." + item.Value.Peptide.Sequence + "." + evidence.Post, Mz = 0, SpecProb = specProb, DelM = 0, ModificationCount = Convert.ToInt16(item.Value.Peptide.Mods.Count) }; // Populate some mass related items result.DelM = result.ObservedMonoisotopicMass - result.MonoisotopicMass; result.DelMPpm = clsPeptideMassCalculator.MassToPPM(result.DelM, result.ObservedMonoisotopicMass); // We could compute m/z: // Mz = clsPeptideMassCalculator.ConvoluteMass(result.ObservedMonoisotopicMass, 0, result.Charge); // But it's stored in the mzid file, so we'll use that result.Mz = item.Value.ExperimentalMz; StoreDatasetInfo(result, path); result.DataSet.Tool = LcmsIdentificationTool.MZIdentML; // Populate items specific to the MSGF+ results (stored as mzid) result.Reference = evidence.DbSeq.Accession; var eCleavageState = cleavageStateCalculator.ComputeCleavageState(item.Value.Peptide.Sequence, evidence.Pre, evidence.Post); result.NumTrypticEnds = clsPeptideCleavageStateCalculator.CleavageStateToShort(eCleavageState); result.DeNovoScore = item.Value.DeNovoScore; result.MsgfScore = item.Value.RawScore; result.SpecEValue = item.Value.SpecEv; result.RankSpecEValue = item.Value.Rank; result.EValue = item.Value.EValue; result.QValue = qValue; result.DiscriminantValue = qValue; result.PepQValue = item.Value.PepQValue; result.IsotopeError = item.Value.IsoError; if (result.ModificationCount > 0) { var j = 0; var numModSeq = evidence.Pre + "."; var encodedSeq = numModSeq; foreach (var mod in item.Value.Peptide.Mods) { var ptm = new PostTranslationalModification { Location = mod.Key, Mass = mod.Value.Mass, Formula = UniModData.ModList[mod.Value.Tag].Formula.ToString(), Name = UniModData.ModList[mod.Value.Tag].Title }; result.Ptms.Add(ptm); for (; j < ptm.Location; j++) { numModSeq = numModSeq + item.Value.Peptide.Sequence[j]; encodedSeq = encodedSeq + item.Value.Peptide.Sequence[j]; } numModSeq += (ptm.Mass > 0) ? "+" : "-"; numModSeq = numModSeq + ptm.Mass; encodedSeq += "[" + ((ptm.Mass > 0)? "+":"-") + ptm.Formula + "]"; } for (; j < item.Value.Peptide.Sequence.Length; j++) { numModSeq = numModSeq + item.Value.Peptide.Sequence[j]; encodedSeq += item.Value.Peptide.Sequence[j]; } numModSeq = numModSeq + "." + evidence.Post; encodedSeq += "." + evidence.Post; result.SeqWithNumericMods = numModSeq; result.EncodedNonNumericSequence = encodedSeq; } else { result.SeqWithNumericMods = result.Sequence; result.EncodedNonNumericSequence = result.Sequence; } result.PeptideInfo = new TargetPeptideInfo { Peptide = result.Sequence, CleanPeptide = result.CleanPeptide, PeptideWithNumericMods = result.SeqWithNumericMods }; result.SeqInfoMonoisotopicMass = result.MonoisotopicMass; result.ModificationDescription = null; foreach (var thing in item.Value.PepEvidence) { var protein = new ProteinInformation { ProteinName = thing.DbSeq.Accession, ResidueStart = thing.Start, ResidueEnd = thing.End }; ComputeTerminusState(evidence, result.NumTrypticEnds, protein); result.Proteins.Add(protein); } if (result.ModificationCount > 0) { foreach (var mod in item.Value.Peptide.Mods) { // TODO: Confirm that this is valid math (MEM thinks it may not be) result.SeqInfoMonoisotopicMass += mod.Value.Mass; result.ModificationDescription += mod.Value.Tag + ":" + mod.Key + " "; } } results.Add(result); } }
private void ReadSqLite(string path) { // Don't read again if we just read the file if (path == m_lastReadFile) { return; } // Reset the data m_targetDb.ClearTargets(); m_lcmsDataDic.Clear(); //var sessionFactory = DatabaseReaderFactory.CreateSessionFactory(path); DatabaseFactory.DatabaseFile = path; DatabaseFactory.ReadOrAppend = true; var sessionFactory = DatabaseFactory.CreateSessionFactory(DatabaseType.SQLite); var readConsensus = new List <ConsensusTarget>(); var readPair = new List <ConsensusProteinPair>(); var readProt = new List <ProteinInformation>(); var readEvidence = new List <Evidence>(); var readPtms = new List <PostTranslationalModification>(); var readPtmPairs = new List <ConsensusPtmPair>(); var readOptions = new List <Options>(); var consensusDic = new Dictionary <int, ConsensusTarget>(); var consensusProtDic = new Dictionary <int, List <ConsensusProteinPair> >(); var consensusPtmDic = new Dictionary <int, List <ConsensusPtmPair> >(); var protDic = new Dictionary <int, ProteinInformation>(); var ptmDic = new Dictionary <int, PostTranslationalModification>(); using (var session = sessionFactory.OpenStatelessSession()) { using (var transact = session.BeginTransaction()) { session.CreateCriteria <ProteinInformation>().List(readProt); session.CreateCriteria <ConsensusTarget>().List(readConsensus); session.CreateCriteria <PostTranslationalModification>().List(readPtms); session.CreateCriteria <Options>().List(readOptions); session.CreateCriteria <ConsensusProteinPair>().List(readPair); session.CreateCriteria <ConsensusPtmPair>().List(readPtmPairs); session.CreateCriteria <Evidence>().List(readEvidence); transact.Commit(); } /* * using (var transact = session.BeginTransaction()) * { * session.CreateCriteria<ConsensusProteinPair>().List(readPair); * session.CreateCriteria<ConsensusPtmPair>().List(readPtmPairs); * session.CreateCriteria<Evidence>().List(readEvidence); * transact.Commit(); * } */ foreach (var consensus in readConsensus) { consensus.Ptms.Clear(); //consensus.Evidences.Clear(); consensus.Evidences = new List <Evidence>(); consensus.Sequence = consensus.CleanSequence; m_targetDb.AddConsensusTarget(consensus); consensusDic.Add(consensus.Id, consensus); } foreach (var pair in readPair) { if (!consensusProtDic.ContainsKey(pair.Consensus.Id)) { consensusProtDic.Add(pair.Consensus.Id, new List <ConsensusProteinPair>()); } consensusProtDic[pair.Consensus.Id].Add(pair); } foreach (var pair in readPtmPairs) { if (!consensusPtmDic.ContainsKey(pair.Target.Id)) { consensusPtmDic.Add(pair.Target.Id, new List <ConsensusPtmPair>()); } consensusPtmDic[pair.Target.Id].Add(pair); } foreach (var prot in readProt) { protDic.Add(prot.Id, prot); } foreach (var ptm in readPtms) { ptmDic.Add(ptm.Id, ptm); } foreach (var consensus in consensusPtmDic) { foreach (var pair in consensus.Value) { var ptm = new PostTranslationalModification { Mass = ptmDic[pair.PostTranslationalModification.Id].Mass, Name = ptmDic[pair.PostTranslationalModification.Id].Name, Formula = ptmDic[pair.PostTranslationalModification.Id].Formula, Location = pair.Location, Parent = consensusDic[pair.Target.Id] }; consensusDic[pair.Target.Id].Ptms.Add(ptm); } } foreach (var evidence in readEvidence) { foreach (var pair in consensusProtDic[evidence.Parent.Id]) { var prot = protDic[pair.Protein.Id]; prot.ResidueEnd = pair.ResidueEnd; prot.ResidueStart = pair.ResidueStart; prot.TerminusState = (clsPeptideCleavageStateCalculator.ePeptideTerminusStateConstants)pair.TerminusState; prot.CleavageState = (clsPeptideCleavageStateCalculator.ePeptideCleavageStateConstants)pair.CleavageState; //prot.Id = 0; evidence.AddProtein(prot); consensusDic[evidence.Parent.Id].AddProtein(prot); } evidence.MonoisotopicMass = consensusDic[evidence.Parent.Id].TheoreticalMonoIsotopicMass; evidence.Ptms = consensusDic[evidence.Parent.Id].Ptms; if (!m_lcmsDataDic.ContainsKey(evidence.DataSet.Name)) { var dataset = new LcmsDataSet(true); m_lcmsDataDic.Add(evidence.DataSet.Name, dataset); m_lcmsDataDic[evidence.DataSet.Name].Name = evidence.DataSet.Name; m_lcmsDataDic[evidence.DataSet.Name].Tool = evidence.DataSet.Tool; } m_lcmsDataDic[evidence.DataSet.Name].Evidences.Add(evidence); consensusDic[evidence.Parent.Id].AddEvidence(evidence); } } // Set the member variable to avoid double reads. m_lastReadFile = path; }