public void TestWriteAccessDb(string path, int numberConsensus, int numberProteins, int numberEvidences) { var writer = new AccessTargetDatabaseWriter(); var options = new Options(); var database = new TargetDatabase(); var evidenceCount = 1; for (var i = 1; i <= numberConsensus; i++) { var target = new ConsensusTarget { Id = i }; for (var k = 1; k <= numberProteins; k++) { var protein = new ProteinInformation { ProteinName = "Access_Test" + k, CleavageState = clsPeptideCleavageStateCalculator.ePeptideCleavageStateConstants.Full, TerminusState = clsPeptideCleavageStateCalculator.ePeptideTerminusStateConstants.ProteinNTerminus }; protein.Consensus.Add(target); for (var j = 1; j <= numberEvidences; j++) { var evidence = new Evidence { AnalysisId = j, Charge = 1, Id = evidenceCount, CleanPeptide = "MIKEDEGAN" + evidenceCount, Sequence = "BIGBIRD" + evidenceCount, Mz = 405, Scan = evidenceCount++, PeptideInfo = new TargetPeptideInfo() }; evidence.AddProtein(protein); target.AddEvidence(evidence); } target.AddProtein(protein); target.CalculateStatistics(); } target.TheoreticalMonoIsotopicMass = 100.0; target.AverageNet = .6; target.PredictedNet = .7; database.ConsensusTargets.Add(target); } if (File.Exists(GetTestSuiteDataPath(path))) { File.Delete(GetTestSuiteDataPath(path)); } writer.Write(database, options, GetTestSuiteDataPath(path)); }
/// <summary> /// Store Protein Info /// </summary> /// <param name="reader"></param> /// <param name="result"></param> protected static void StoreProteinInfo(clsPHRPReader reader, Evidence result) { foreach (var p in reader.CurrentPSM.ProteinDetails) { var protein = new ProteinInformation { ProteinName = p.Value.ProteinName, CleavageState = p.Value.CleavageState, TerminusState = p.Value.TerminusState, ResidueStart = p.Value.ResidueStart, ResidueEnd = p.Value.ResidueEnd }; result.Proteins.Add(protein); } }
public void TestWriteDatabase(string path, int numberOfTargets, int numberOfEvidences) { var reader = new SqLiteTargetDatabaseWriter(); var options = new Options(); var database = new TargetDatabase(); var proteinCount = 1; var evidenceCount = 1; for (var i = 1; i <= numberOfTargets; i++) { var target = new ConsensusTarget { Id = i }; var protein = new ProteinInformation { ProteinName = "SO_Test" + proteinCount++, CleavageState = clsPeptideCleavageStateCalculator.ePeptideCleavageStateConstants.Full, TerminusState = clsPeptideCleavageStateCalculator.ePeptideTerminusStateConstants.ProteinNTerminus, }; protein.Consensus.Add(target); for (var j = 1; j <= numberOfEvidences; j++) { var evidence = new Evidence { AnalysisId = j, Charge = 1, Id = evidenceCount, CleanPeptide = "MIKEDEGAN" + evidenceCount, Sequence = "BIGBIRD" + evidenceCount, Mz = 405, Scan = evidenceCount++, PeptideInfo = new TargetPeptideInfo() }; evidence.AddProtein(protein); target.AddEvidence(evidence); } target.AddProtein(protein); target.CalculateStatistics(); database.ConsensusTargets.Add(target); } reader.Write(database, options, GetTestSuiteDataPath(path)); }
public TargetDatabase ReadDb(string path) { // Read in the data from the access database // put it into a text file (?) // Read the data from the text file into program var accApplication = new ACCESS.Application(); var pathPieces = path.Split('\\'); string directory = ""; foreach (var piece in pathPieces) { if (piece.Contains(".")) { continue; } directory += piece; directory += "\\"; } accApplication.OpenCurrentDatabase(path); accApplication.DoCmd.TransferText(TransferType: ACCESS.AcTextTransferType.acExportDelim, TableName: "AMT", FileName: directory + "outTempAMT.txt", HasFieldNames: true); accApplication.DoCmd.TransferText(TransferType: ACCESS.AcTextTransferType.acExportDelim, TableName: "AMT_Proteins", FileName: directory + "outTempAMT_Proteins.txt", HasFieldNames: true); accApplication.DoCmd.TransferText(TransferType: ACCESS.AcTextTransferType.acExportDelim, TableName: "AMT_to_Protein_Map", FileName: directory + "outTempAMT_to_Protein_Map.txt", HasFieldNames: true); accApplication.CloseCurrentDatabase(); accApplication.Quit(); // Put the data into its objects // AMT stuff going in Consensus targets // NET, MonoMass, Pred. Net, Peptide (Sequence with numeric mods), ID (can be crushed later) // OBSERVED <-- number of times this peptide was seen in the AMT // for <observed> times, add an evidence with the info? would make sense and would allow the stats calcs to be accurate // Prot stuff going into ProteinInfo // Prot name only thing important for MTDB, ID (can be crushed later) // AMT map // Link Consensus and Protein (ct[ct_id].protein.add(protein[prot_id])) var consensusTargets = new Dictionary <int, ConsensusTarget>(); var proteins = new Dictionary <int, ProteinInformation>(); var ctReader = new StreamReader(directory + "outTempAMT.txt"); var protReader = new StreamReader(directory + "outTempAMT_Proteins.txt"); var mapReader = new StreamReader(directory + "outTempAMT_to_Protein_Map.txt"); // Read the headers for the files ctReader.ReadLine(); protReader.ReadLine(); mapReader.ReadLine(); // Read the first "Data" lines from the files var ctLine = ctReader.ReadLine(); var protLine = protReader.ReadLine(); var mapLine = mapReader.ReadLine(); while (ctLine != null) { var pieces = ctLine.Split(','); var target = new ConsensusTarget { Id = Convert.ToInt32(pieces[0]), TheoreticalMonoIsotopicMass = Convert.ToDouble(pieces[1]), AverageNet = Convert.ToDouble(pieces[2]), PredictedNet = Convert.ToDouble(pieces[3]), EncodedNumericSequence = pieces[6] }; var totalEvidences = Convert.ToInt32(pieces[4]); var normScore = Convert.ToDouble(pieces[5]); for (var evNum = 0; evNum < totalEvidences; evNum++) { var evidence = new Evidence { ObservedNet = target.AverageNet, ObservedMonoisotopicMass = target.TheoreticalMonoIsotopicMass, PredictedNet = target.PredictedNet, NormalizedScore = normScore, SeqWithNumericMods = target.EncodedNumericSequence, Parent = target }; target.Evidences.Add(evidence); } consensusTargets.Add(target.Id, target); ctLine = ctReader.ReadLine(); } while (protLine != null) { var pieces = protLine.Split(','); var protein = new ProteinInformation { ProteinName = pieces[1] }; proteins.Add(Convert.ToInt32(pieces[0]), protein); protLine = protReader.ReadLine(); } while (mapLine != null) { var pieces = mapLine.Split(','); consensusTargets[Convert.ToInt32(pieces[0])].AddProtein(proteins[Convert.ToInt32(pieces[1])]); mapLine = mapReader.ReadLine(); } ctReader.Close(); protReader.Close(); mapReader.Close(); File.Delete(directory + "outTempAMT.txt"); File.Delete(directory + "outTempAMT_Proteins.txt"); File.Delete(directory + "outTempAMT_to_Protein_Map.txt"); var database = new TargetDatabase(); foreach (var target in consensusTargets) { database.AddConsensusTarget(target.Value); } database.Proteins = proteins.Values.ToList(); return(database); }
/// <summary> /// Map the results of a MZIdentML read to MSGF+ /// </summary> /// <param name="results">Object to populate with the results of the Mapping</param> /// <param name="path">Path to MZIdentML file</param> private void MapToMsgf(List <MsgfPlusResult> results, string path) { var filter = new MsgfPlusTargetFilter(ReaderOptions); var cleavageStateCalculator = new clsPeptideCleavageStateCalculator(); var i = 0; var total = m_specItems.Count; // Go through each Spectrum ID and map it to an MSGF+ result foreach (var item in m_specItems) { i++; if (i % 500 == 0) { UpdateProgress((100 * ((float)i / total))); } // Skip this PSM if it doesn't pass the import filters // Note that qValue is basically FDR double qValue = item.Value.QValue; double specProb = item.Value.SpecEv; if (filter.ShouldFilter(qValue, specProb)) { continue; } if (item.Value.PepEvidence.Count == 0) { continue; } var evidence = item.Value.PepEvidence[0]; var result = new MsgfPlusResult { AnalysisId = i, Charge = Convert.ToInt16(item.Value.Charge), CleanPeptide = item.Value.Peptide.Sequence, SeqWithNumericMods = null, MonoisotopicMass = clsPeptideMassCalculator.ConvoluteMass(item.Value.CalMz, item.Value.Charge, 0), ObservedMonoisotopicMass = clsPeptideMassCalculator.ConvoluteMass(item.Value.ExperimentalMz, item.Value.Charge, 0), MultiProteinCount = Convert.ToInt16(item.Value.PepEvCount), Scan = item.Value.ScanNum, Sequence = evidence.Pre + "." + item.Value.Peptide.Sequence + "." + evidence.Post, Mz = 0, SpecProb = specProb, DelM = 0, ModificationCount = Convert.ToInt16(item.Value.Peptide.Mods.Count) }; // Populate some mass related items result.DelM = result.ObservedMonoisotopicMass - result.MonoisotopicMass; result.DelMPpm = clsPeptideMassCalculator.MassToPPM(result.DelM, result.ObservedMonoisotopicMass); // We could compute m/z: // Mz = clsPeptideMassCalculator.ConvoluteMass(result.ObservedMonoisotopicMass, 0, result.Charge); // But it's stored in the mzid file, so we'll use that result.Mz = item.Value.ExperimentalMz; StoreDatasetInfo(result, path); result.DataSet.Tool = LcmsIdentificationTool.MZIdentML; // Populate items specific to the MSGF+ results (stored as mzid) result.Reference = evidence.DbSeq.Accession; var eCleavageState = cleavageStateCalculator.ComputeCleavageState(item.Value.Peptide.Sequence, evidence.Pre, evidence.Post); result.NumTrypticEnds = clsPeptideCleavageStateCalculator.CleavageStateToShort(eCleavageState); result.DeNovoScore = item.Value.DeNovoScore; result.MsgfScore = item.Value.RawScore; result.SpecEValue = item.Value.SpecEv; result.RankSpecEValue = item.Value.Rank; result.EValue = item.Value.EValue; result.QValue = qValue; result.DiscriminantValue = qValue; result.PepQValue = item.Value.PepQValue; result.IsotopeError = item.Value.IsoError; if (result.ModificationCount > 0) { var j = 0; var numModSeq = evidence.Pre + "."; var encodedSeq = numModSeq; foreach (var mod in item.Value.Peptide.Mods) { var ptm = new PostTranslationalModification { Location = mod.Key, Mass = mod.Value.Mass, Formula = UniModData.ModList[mod.Value.Tag].Formula.ToString(), Name = UniModData.ModList[mod.Value.Tag].Title }; result.Ptms.Add(ptm); for (; j < ptm.Location; j++) { numModSeq = numModSeq + item.Value.Peptide.Sequence[j]; encodedSeq = encodedSeq + item.Value.Peptide.Sequence[j]; } numModSeq += (ptm.Mass > 0) ? "+" : "-"; numModSeq = numModSeq + ptm.Mass; encodedSeq += "[" + ((ptm.Mass > 0)? "+":"-") + ptm.Formula + "]"; } for (; j < item.Value.Peptide.Sequence.Length; j++) { numModSeq = numModSeq + item.Value.Peptide.Sequence[j]; encodedSeq += item.Value.Peptide.Sequence[j]; } numModSeq = numModSeq + "." + evidence.Post; encodedSeq += "." + evidence.Post; result.SeqWithNumericMods = numModSeq; result.EncodedNonNumericSequence = encodedSeq; } else { result.SeqWithNumericMods = result.Sequence; result.EncodedNonNumericSequence = result.Sequence; } result.PeptideInfo = new TargetPeptideInfo { Peptide = result.Sequence, CleanPeptide = result.CleanPeptide, PeptideWithNumericMods = result.SeqWithNumericMods }; result.SeqInfoMonoisotopicMass = result.MonoisotopicMass; result.ModificationDescription = null; foreach (var thing in item.Value.PepEvidence) { var protein = new ProteinInformation { ProteinName = thing.DbSeq.Accession, ResidueStart = thing.Start, ResidueEnd = thing.End }; ComputeTerminusState(evidence, result.NumTrypticEnds, protein); result.Proteins.Add(protein); } if (result.ModificationCount > 0) { foreach (var mod in item.Value.Peptide.Mods) { // TODO: Confirm that this is valid math (MEM thinks it may not be) result.SeqInfoMonoisotopicMass += mod.Value.Mass; result.ModificationDescription += mod.Value.Tag + ":" + mod.Key + " "; } } results.Add(result); } }
private void ComputeTerminusState(PeptideEvidence evidence, short numTrypticEnds, ProteinInformation protein) { if (evidence.Pre[0] == '-') { if (evidence.Post[0] == '-') { protein.TerminusState = clsPeptideCleavageStateCalculator.ePeptideTerminusStateConstants.ProteinNandCCTerminus; protein.CleavageState = clsPeptideCleavageStateCalculator.ePeptideCleavageStateConstants.Full; } else { protein.TerminusState = clsPeptideCleavageStateCalculator.ePeptideTerminusStateConstants.ProteinNTerminus; } } else if (evidence.Post[0] == '-') { protein.TerminusState = clsPeptideCleavageStateCalculator.ePeptideTerminusStateConstants.ProteinCTerminus; } else { protein.TerminusState = clsPeptideCleavageStateCalculator.ePeptideTerminusStateConstants.None; } switch (numTrypticEnds) { case 0: protein.CleavageState = clsPeptideCleavageStateCalculator.ePeptideCleavageStateConstants.NonSpecific; break; case 1: protein.CleavageState = clsPeptideCleavageStateCalculator.ePeptideCleavageStateConstants.Partial; break; case 2: protein.CleavageState = clsPeptideCleavageStateCalculator.ePeptideCleavageStateConstants.Full; break; } }
public ProteinTreeNodeViewModel(ProteinInformation protein, TreeNodeViewModel parent) : base(String.Format("{0} ({1} peptides)", protein.ProteinName, protein.Consensus.Count), true, parent) { m_proteinInformation = protein; }
private void RetrieveDataFromTextFiles(string directory) { using (var reader = new StreamReader(directory + "tempModInfo.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); m_modTagsToModMass.Add(rowPieces[0], new Tuple <double, string>(Convert.ToDouble(rowPieces[1]), rowPieces[2])); row = reader.ReadLine(); } } var ptmId = 1; var targetId = 1; var ctToPtmId = 1; using (var reader = new StreamReader(directory + "tempMassTags.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var target = new ConsensusTarget(); target.Id = targetId++; var quote = ""; var unescapedPiece = rowPieces[14].Replace("\"\"", quote); var sequence = rowPieces[1]; if (unescapedPiece != "") { var unquotedPiece = unescapedPiece.Substring(1, rowPieces[14].Length - 2); var mods = unquotedPiece.Split(','); foreach (var mod in mods) { var modPieces = mod.Split(':'); var modMass = m_modTagsToModMass[modPieces[0]].Item1; var ptm = new PostTranslationalModification(); ptm.Name = modPieces[0]; if (!m_ptmDictionary.ContainsKey(ptm.Name)) { ptm.Mass = modMass; ptm.Id = ptmId++; ptm.Formula = m_modTagsToModMass[modPieces[0]].Item2; m_ptmDictionary.Add(ptm.Name, ptm); } target.Ptms.Add(m_ptmDictionary[ptm.Name]); ptm.Location = Convert.ToInt32(modPieces[1]); var ctToPtm = new ConsensusPtmPair { ConsensusId = target.Id, PtmId = m_ptmDictionary[ptm.Name].Id, Location = ptm.Location, Id = ctToPtmId++ }; m_consensusTargetToPtmDict[ctToPtm.Id] = ctToPtm; } target.ModificationDescription = unquotedPiece; } var fullSequence = sequence; var backPtms = target.Ptms.OrderByDescending(x => x.Location); foreach (var ptm in backPtms) { if (ptm.Location == rowPieces[1].Length) { rowPieces[1] += ptm.Mass.ToString(); } else { rowPieces[1] = fullSequence.Insert(ptm.Location, ptm.Mass.ToString()); } } target.EncodedNumericSequence = rowPieces[1]; target.Sequence = fullSequence; target.TheoreticalMonoIsotopicMass = Convert.ToDouble(rowPieces[2]); target.MultiProteinCount = Convert.ToInt16(rowPieces[4]); target.ModificationCount = Convert.ToInt16(rowPieces[13]); m_idToMassTagDict.Add(Convert.ToInt32(rowPieces[0]), row); m_idToConensusTargetDict.Add(Convert.ToInt32(rowPieces[0]), target); row = reader.ReadLine(); } } using (var reader = new StreamReader(directory + "tempMassTagsNet.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var id = Convert.ToInt32(rowPieces[0]); m_idToConensusTargetDict[id].PredictedNet = Convert.ToDouble(rowPieces[7]); m_idToConensusTargetDict[id].StdevNet = Convert.ToDouble(rowPieces[5]); m_idToConensusTargetDict[id].AverageNet = Convert.ToDouble(rowPieces[3]); row = reader.ReadLine(); } } var proteinId = 1; using (var reader = new StreamReader(directory + "tempProteins.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var unquotedPiece = rowPieces[1].Substring(1, rowPieces[1].Length - 2); var prot = new ProteinInformation { Id = proteinId++, ProteinName = unquotedPiece }; m_idToProteinDict[Convert.ToInt32(rowPieces[0])] = prot; row = reader.ReadLine(); } } var cppId = 1; using (var reader = new StreamReader(directory + "tempMassTagToProteins.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var mt_id = Convert.ToInt32(rowPieces[0]); var prot_id = Convert.ToInt32(rowPieces[2]); var ctToProt = new ConsensusProteinPair(); ctToProt.CleavageState = Convert.ToInt16(rowPieces[3]); ctToProt.ResidueStart = Convert.ToInt32(rowPieces[6]); ctToProt.ResidueEnd = Convert.ToInt32(rowPieces[7]); ctToProt.TerminusState = Convert.ToInt16(rowPieces[9]); ctToProt.ConsensusId = m_idToConensusTargetDict[mt_id].Id; ctToProt.ProteinId = m_idToProteinDict[prot_id].Id; m_ctToProtDict[cppId] = ctToProt; cppId++; row = reader.ReadLine(); } } var totalCharges = 0; var evId = 1; using (var reader = new StreamReader(directory + "tempPeptides.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var id = Convert.ToInt32(rowPieces[0]); if (!m_idToChargeAndPeptide.ContainsKey(id)) { m_idToChargeAndPeptide[id] = new Tuple <string, List <short> >(rowPieces[1], new List <short>()); m_idToChargeAndPeptide[id].Item2.Add(Convert.ToInt16(rowPieces[2])); m_idToConensusTargetDict[id].Sequence = rowPieces[1][0] + "." + m_idToConensusTargetDict[id].Sequence + "." + rowPieces[1][rowPieces[1].Length - 1]; m_idToConensusTargetDict[id].CleanSequence = m_idToConensusTargetDict[id].Sequence; m_idToConensusTargetDict[id].Charges.Add(Convert.ToInt16(rowPieces[2])); totalCharges++; } if (!m_idToChargeAndPeptide[id].Item2.Contains(Convert.ToInt16(rowPieces[2]))) { m_idToChargeAndPeptide[id].Item2.Add(Convert.ToInt16(rowPieces[2])); m_idToConensusTargetDict[id].Charges.Add(Convert.ToInt16(rowPieces[2])); totalCharges++; } var ctId = m_idToConensusTargetDict[id].Id; var ev = new Evidence(); ev.Id = evId++; ev.Charge = Convert.ToInt16(rowPieces[2]); ev.Sequence = m_idToConensusTargetDict[id].CleanSequence; ev.Scan = Convert.ToInt32(rowPieces[3]); ev.DelMPpm = Convert.ToDouble(rowPieces[4]); ev.ObservedNet = Convert.ToDouble(rowPieces[5]); ev.ObservedMonoisotopicMass = Convert.ToDouble(rowPieces[6]); ev.Mz = ev.ObservedMonoisotopicMass / ev.Charge; ev.NetShift = 0; ev.DelM = ev.DelMPpm / 1000000; ev.Parent = m_idToConensusTargetDict[id]; m_evidenceDict[evId] = ev; if (!m_ctToEvidenceMap.ContainsKey(ctId)) { m_ctToEvidenceMap[ctId] = new List <int>(); } m_ctToEvidenceMap[ctId].Add(evId); row = reader.ReadLine(); } } }