public void TestWriteAccessDb(string path, int numberConsensus, int numberProteins, int numberEvidences) { var writer = new AccessTargetDatabaseWriter(); var options = new Options(); var database = new TargetDatabase(); var evidenceCount = 1; for (var i = 1; i <= numberConsensus; i++) { var target = new ConsensusTarget { Id = i }; for (var k = 1; k <= numberProteins; k++) { var protein = new ProteinInformation { ProteinName = "Access_Test" + k, CleavageState = clsPeptideCleavageStateCalculator.ePeptideCleavageStateConstants.Full, TerminusState = clsPeptideCleavageStateCalculator.ePeptideTerminusStateConstants.ProteinNTerminus }; protein.Consensus.Add(target); for (var j = 1; j <= numberEvidences; j++) { var evidence = new Evidence { AnalysisId = j, Charge = 1, Id = evidenceCount, CleanPeptide = "MIKEDEGAN" + evidenceCount, Sequence = "BIGBIRD" + evidenceCount, Mz = 405, Scan = evidenceCount++, PeptideInfo = new TargetPeptideInfo() }; evidence.AddProtein(protein); target.AddEvidence(evidence); } target.AddProtein(protein); target.CalculateStatistics(); } target.TheoreticalMonoIsotopicMass = 100.0; target.AverageNet = .6; target.PredictedNet = .7; database.ConsensusTargets.Add(target); } if (File.Exists(GetTestSuiteDataPath(path))) { File.Delete(GetTestSuiteDataPath(path)); } writer.Write(database, options, GetTestSuiteDataPath(path)); }
/// <summary> /// Adds a target to the protein. If it already exists, the target is ignored. /// </summary> /// <param name="target"></param> public void AddConsensusTarget(ConsensusTarget target) { if (!m_targets.ContainsKey(target.Id)) { m_targets.Add(target.Id, target); } }
public void TestWriteDatabase(string path, int numberOfTargets, int numberOfEvidences) { var reader = new SqLiteTargetDatabaseWriter(); var options = new Options(); var database = new TargetDatabase(); var proteinCount = 1; var evidenceCount = 1; for (var i = 1; i <= numberOfTargets; i++) { var target = new ConsensusTarget { Id = i }; var protein = new ProteinInformation { ProteinName = "SO_Test" + proteinCount++, CleavageState = clsPeptideCleavageStateCalculator.ePeptideCleavageStateConstants.Full, TerminusState = clsPeptideCleavageStateCalculator.ePeptideTerminusStateConstants.ProteinNTerminus, }; protein.Consensus.Add(target); for (var j = 1; j <= numberOfEvidences; j++) { var evidence = new Evidence { AnalysisId = j, Charge = 1, Id = evidenceCount, CleanPeptide = "MIKEDEGAN" + evidenceCount, Sequence = "BIGBIRD" + evidenceCount, Mz = 405, Scan = evidenceCount++, PeptideInfo = new TargetPeptideInfo() }; evidence.AddProtein(protein); target.AddEvidence(evidence); } target.AddProtein(protein); target.CalculateStatistics(); database.ConsensusTargets.Add(target); } reader.Write(database, options, GetTestSuiteDataPath(path)); }
public TargetDatabase ReadDb(string path) { // Read in the data from the access database // put it into a text file (?) // Read the data from the text file into program var accApplication = new ACCESS.Application(); var pathPieces = path.Split('\\'); string directory = ""; foreach (var piece in pathPieces) { if (piece.Contains(".")) { continue; } directory += piece; directory += "\\"; } accApplication.OpenCurrentDatabase(path); accApplication.DoCmd.TransferText(TransferType: ACCESS.AcTextTransferType.acExportDelim, TableName: "AMT", FileName: directory + "outTempAMT.txt", HasFieldNames: true); accApplication.DoCmd.TransferText(TransferType: ACCESS.AcTextTransferType.acExportDelim, TableName: "AMT_Proteins", FileName: directory + "outTempAMT_Proteins.txt", HasFieldNames: true); accApplication.DoCmd.TransferText(TransferType: ACCESS.AcTextTransferType.acExportDelim, TableName: "AMT_to_Protein_Map", FileName: directory + "outTempAMT_to_Protein_Map.txt", HasFieldNames: true); accApplication.CloseCurrentDatabase(); accApplication.Quit(); // Put the data into its objects // AMT stuff going in Consensus targets // NET, MonoMass, Pred. Net, Peptide (Sequence with numeric mods), ID (can be crushed later) // OBSERVED <-- number of times this peptide was seen in the AMT // for <observed> times, add an evidence with the info? would make sense and would allow the stats calcs to be accurate // Prot stuff going into ProteinInfo // Prot name only thing important for MTDB, ID (can be crushed later) // AMT map // Link Consensus and Protein (ct[ct_id].protein.add(protein[prot_id])) var consensusTargets = new Dictionary <int, ConsensusTarget>(); var proteins = new Dictionary <int, ProteinInformation>(); var ctReader = new StreamReader(directory + "outTempAMT.txt"); var protReader = new StreamReader(directory + "outTempAMT_Proteins.txt"); var mapReader = new StreamReader(directory + "outTempAMT_to_Protein_Map.txt"); // Read the headers for the files ctReader.ReadLine(); protReader.ReadLine(); mapReader.ReadLine(); // Read the first "Data" lines from the files var ctLine = ctReader.ReadLine(); var protLine = protReader.ReadLine(); var mapLine = mapReader.ReadLine(); while (ctLine != null) { var pieces = ctLine.Split(','); var target = new ConsensusTarget { Id = Convert.ToInt32(pieces[0]), TheoreticalMonoIsotopicMass = Convert.ToDouble(pieces[1]), AverageNet = Convert.ToDouble(pieces[2]), PredictedNet = Convert.ToDouble(pieces[3]), EncodedNumericSequence = pieces[6] }; var totalEvidences = Convert.ToInt32(pieces[4]); var normScore = Convert.ToDouble(pieces[5]); for (var evNum = 0; evNum < totalEvidences; evNum++) { var evidence = new Evidence { ObservedNet = target.AverageNet, ObservedMonoisotopicMass = target.TheoreticalMonoIsotopicMass, PredictedNet = target.PredictedNet, NormalizedScore = normScore, SeqWithNumericMods = target.EncodedNumericSequence, Parent = target }; target.Evidences.Add(evidence); } consensusTargets.Add(target.Id, target); ctLine = ctReader.ReadLine(); } while (protLine != null) { var pieces = protLine.Split(','); var protein = new ProteinInformation { ProteinName = pieces[1] }; proteins.Add(Convert.ToInt32(pieces[0]), protein); protLine = protReader.ReadLine(); } while (mapLine != null) { var pieces = mapLine.Split(','); consensusTargets[Convert.ToInt32(pieces[0])].AddProtein(proteins[Convert.ToInt32(pieces[1])]); mapLine = mapReader.ReadLine(); } ctReader.Close(); protReader.Close(); mapReader.Close(); File.Delete(directory + "outTempAMT.txt"); File.Delete(directory + "outTempAMT_Proteins.txt"); File.Delete(directory + "outTempAMT_to_Protein_Map.txt"); var database = new TargetDatabase(); foreach (var target in consensusTargets) { database.AddConsensusTarget(target.Value); } database.Proteins = proteins.Values.ToList(); return(database); }
public ConsensusTargetTreeNodeViewModel(ConsensusTarget ct, TreeNodeViewModel parent) : base(String.Format("{0} ({1} evidences)", ct.EncodedNumericSequence, ct.Evidences.Count), true, parent) { m_consensusTarget = ct; }
private void RetrieveDataFromTextFiles(string directory) { using (var reader = new StreamReader(directory + "tempModInfo.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); m_modTagsToModMass.Add(rowPieces[0], new Tuple <double, string>(Convert.ToDouble(rowPieces[1]), rowPieces[2])); row = reader.ReadLine(); } } var ptmId = 1; var targetId = 1; var ctToPtmId = 1; using (var reader = new StreamReader(directory + "tempMassTags.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var target = new ConsensusTarget(); target.Id = targetId++; var quote = ""; var unescapedPiece = rowPieces[14].Replace("\"\"", quote); var sequence = rowPieces[1]; if (unescapedPiece != "") { var unquotedPiece = unescapedPiece.Substring(1, rowPieces[14].Length - 2); var mods = unquotedPiece.Split(','); foreach (var mod in mods) { var modPieces = mod.Split(':'); var modMass = m_modTagsToModMass[modPieces[0]].Item1; var ptm = new PostTranslationalModification(); ptm.Name = modPieces[0]; if (!m_ptmDictionary.ContainsKey(ptm.Name)) { ptm.Mass = modMass; ptm.Id = ptmId++; ptm.Formula = m_modTagsToModMass[modPieces[0]].Item2; m_ptmDictionary.Add(ptm.Name, ptm); } target.Ptms.Add(m_ptmDictionary[ptm.Name]); ptm.Location = Convert.ToInt32(modPieces[1]); var ctToPtm = new ConsensusPtmPair { ConsensusId = target.Id, PtmId = m_ptmDictionary[ptm.Name].Id, Location = ptm.Location, Id = ctToPtmId++ }; m_consensusTargetToPtmDict[ctToPtm.Id] = ctToPtm; } target.ModificationDescription = unquotedPiece; } var fullSequence = sequence; var backPtms = target.Ptms.OrderByDescending(x => x.Location); foreach (var ptm in backPtms) { if (ptm.Location == rowPieces[1].Length) { rowPieces[1] += ptm.Mass.ToString(); } else { rowPieces[1] = fullSequence.Insert(ptm.Location, ptm.Mass.ToString()); } } target.EncodedNumericSequence = rowPieces[1]; target.Sequence = fullSequence; target.TheoreticalMonoIsotopicMass = Convert.ToDouble(rowPieces[2]); target.MultiProteinCount = Convert.ToInt16(rowPieces[4]); target.ModificationCount = Convert.ToInt16(rowPieces[13]); m_idToMassTagDict.Add(Convert.ToInt32(rowPieces[0]), row); m_idToConensusTargetDict.Add(Convert.ToInt32(rowPieces[0]), target); row = reader.ReadLine(); } } using (var reader = new StreamReader(directory + "tempMassTagsNet.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var id = Convert.ToInt32(rowPieces[0]); m_idToConensusTargetDict[id].PredictedNet = Convert.ToDouble(rowPieces[7]); m_idToConensusTargetDict[id].StdevNet = Convert.ToDouble(rowPieces[5]); m_idToConensusTargetDict[id].AverageNet = Convert.ToDouble(rowPieces[3]); row = reader.ReadLine(); } } var proteinId = 1; using (var reader = new StreamReader(directory + "tempProteins.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var unquotedPiece = rowPieces[1].Substring(1, rowPieces[1].Length - 2); var prot = new ProteinInformation { Id = proteinId++, ProteinName = unquotedPiece }; m_idToProteinDict[Convert.ToInt32(rowPieces[0])] = prot; row = reader.ReadLine(); } } var cppId = 1; using (var reader = new StreamReader(directory + "tempMassTagToProteins.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var mt_id = Convert.ToInt32(rowPieces[0]); var prot_id = Convert.ToInt32(rowPieces[2]); var ctToProt = new ConsensusProteinPair(); ctToProt.CleavageState = Convert.ToInt16(rowPieces[3]); ctToProt.ResidueStart = Convert.ToInt32(rowPieces[6]); ctToProt.ResidueEnd = Convert.ToInt32(rowPieces[7]); ctToProt.TerminusState = Convert.ToInt16(rowPieces[9]); ctToProt.ConsensusId = m_idToConensusTargetDict[mt_id].Id; ctToProt.ProteinId = m_idToProteinDict[prot_id].Id; m_ctToProtDict[cppId] = ctToProt; cppId++; row = reader.ReadLine(); } } var totalCharges = 0; var evId = 1; using (var reader = new StreamReader(directory + "tempPeptides.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var id = Convert.ToInt32(rowPieces[0]); if (!m_idToChargeAndPeptide.ContainsKey(id)) { m_idToChargeAndPeptide[id] = new Tuple <string, List <short> >(rowPieces[1], new List <short>()); m_idToChargeAndPeptide[id].Item2.Add(Convert.ToInt16(rowPieces[2])); m_idToConensusTargetDict[id].Sequence = rowPieces[1][0] + "." + m_idToConensusTargetDict[id].Sequence + "." + rowPieces[1][rowPieces[1].Length - 1]; m_idToConensusTargetDict[id].CleanSequence = m_idToConensusTargetDict[id].Sequence; m_idToConensusTargetDict[id].Charges.Add(Convert.ToInt16(rowPieces[2])); totalCharges++; } if (!m_idToChargeAndPeptide[id].Item2.Contains(Convert.ToInt16(rowPieces[2]))) { m_idToChargeAndPeptide[id].Item2.Add(Convert.ToInt16(rowPieces[2])); m_idToConensusTargetDict[id].Charges.Add(Convert.ToInt16(rowPieces[2])); totalCharges++; } var ctId = m_idToConensusTargetDict[id].Id; var ev = new Evidence(); ev.Id = evId++; ev.Charge = Convert.ToInt16(rowPieces[2]); ev.Sequence = m_idToConensusTargetDict[id].CleanSequence; ev.Scan = Convert.ToInt32(rowPieces[3]); ev.DelMPpm = Convert.ToDouble(rowPieces[4]); ev.ObservedNet = Convert.ToDouble(rowPieces[5]); ev.ObservedMonoisotopicMass = Convert.ToDouble(rowPieces[6]); ev.Mz = ev.ObservedMonoisotopicMass / ev.Charge; ev.NetShift = 0; ev.DelM = ev.DelMPpm / 1000000; ev.Parent = m_idToConensusTargetDict[id]; m_evidenceDict[evId] = ev; if (!m_ctToEvidenceMap.ContainsKey(ctId)) { m_ctToEvidenceMap[ctId] = new List <int>(); } m_ctToEvidenceMap[ctId].Add(evId); row = reader.ReadLine(); } } }
/// <summary> /// Adds a target and its matching proteins to the database (unique) /// </summary> /// <param name="target"></param> public void AddConsensusTarget(ConsensusTarget target) { ConsensusTargets.Add(target); }