/// <summary> /// Write to the SQLite database /// </summary> /// <param name="database"></param> /// <param name="options"></param> /// <param name="path"></param> public void Write(TargetDatabase database, Options options, string path) { DatabaseFactory.DatabaseFile = path; var databaseDirectory = Path.GetDirectoryName(path); /********************************************************************************************** * TODO: Get the append capability working * Set to false to avoid problems. Setting it to true will append some items, but not others. **********************************************************************************************/ DatabaseFactory.ReadOrAppend = false; var sessionFactory = DatabaseFactory.CreateSessionFactory(options.DatabaseType); using (var session = sessionFactory.OpenStatelessSession()) { // populate the database using (var transaction = session.BeginTransaction()) { session.Insert(options); /* This section breaks up the Target object, pulling out the individual TargetDataSet, SequenceInfo, * and TargetPeptideInfo. These objects are then "reverse linked", so that each of these objects * relates to multiple evidences. This is because these objects need to know what they are related to. * Additionally, these objects are saved before the Evidences are, because these objects need to already * exist in order to properly generate the relation. * */ var currentTarget = 0; var currentEv = 0; var datasetCount = 0; var total = database.ConsensusTargets.Count; foreach (var consensusTarget in database.ConsensusTargets) { OnProgressChanged(new MtdbProgressChangedEventArgs(currentTarget, total, MtdbCreationProgressType.COMMIT.ToString())); consensusTarget.Id = ++currentTarget; foreach (var ptm in consensusTarget.Ptms) { ptm.Id = 0; } consensusTarget.Dataset = consensusTarget.Evidences[0].DataSet; consensusTarget.ModificationCount = consensusTarget.Evidences[0].ModificationCount; consensusTarget.ModificationDescription = consensusTarget.Evidences[0].ModificationDescription; consensusTarget.MultiProteinCount = consensusTarget.Evidences[0].MultiProteinCount; session.Insert(consensusTarget); foreach (var evidence in consensusTarget.Evidences) { if (!m_uniqueDataSets.ContainsKey(evidence.DataSet.Name)) { evidence.DataSet.Id = ++datasetCount; m_uniqueDataSets.Add(evidence.DataSet.Name, evidence.DataSet); var outputPath = databaseDirectory + evidence.DataSet.Name + "Alignment.tsv"; var datasetWriter = new StreamWriter(databaseDirectory + "\\" + evidence.DataSet.Name + "Alignment.tsv"); datasetWriter.WriteLine("GANET_Obs\tScan_Number"); m_alignmentWriters.Add(evidence.DataSet.Name, datasetWriter); session.Insert(evidence.DataSet); } Evidence writtenEvidence = new Evidence { Id = ++currentEv, Charge = evidence.Charge, ObservedNet = evidence.ObservedNet, NetShift = evidence.NetShift, Mz = evidence.Mz, Scan = evidence.Scan, DelM = evidence.DelM, DelMPpm = evidence.DelMPpm, DiscriminantValue = evidence.DiscriminantValue, SpecProb = evidence.SpecProb, DataSet = m_uniqueDataSets[evidence.DataSet.Name], Parent = consensusTarget }; m_alignmentWriters[evidence.DataSet.Name].WriteLine(string.Format("{0}\t{1}", writtenEvidence.ObservedNet, writtenEvidence.Scan)); if (writtenEvidence.DiscriminantValue > 0.0) { writtenEvidence.DiscriminantValue += 0.0; } session.Insert(writtenEvidence); } foreach (var protein in consensusTarget.Proteins) { if (!m_uniqueProteins.ContainsKey(protein.ProteinName)) { protein.Id = 0; m_uniqueProteins.Add(protein.ProteinName, protein); session.Insert(protein); } var cProt = m_uniqueProteins[protein.ProteinName]; var cPPair = new ConsensusProteinPair { Consensus = consensusTarget, Protein = cProt, CleavageState = (short)cProt.CleavageState, TerminusState = (short)cProt.TerminusState, ResidueStart = (short)cProt.ResidueStart, ResidueEnd = (short)cProt.ResidueEnd }; session.Insert(cPPair); consensusTarget.ConsensusProtein.Add(cPPair); } foreach (var ptm in consensusTarget.Ptms) { if (!m_uniquePtms.ContainsKey(ptm.Name)) { m_uniquePtms.Add(ptm.Name, ptm); session.Insert(ptm); } var cPtmPair = new ConsensusPtmPair { Location = ptm.Location, PostTranslationalModification = m_uniquePtms[ptm.Name], PtmId = m_uniquePtms[ptm.Name].Id, Target = consensusTarget, ConsensusId = consensusTarget.Id }; session.Insert(cPtmPair); } } OnProgressChanged(new MtdbProgressChangedEventArgs(currentTarget, total, MtdbCreationProgressType.COMMIT.ToString())); transaction.Commit(); session.Close(); } } foreach (var writer in m_alignmentWriters) { writer.Value.Close(); } }
private void RetrieveDataFromTextFiles(string directory) { using (var reader = new StreamReader(directory + "tempModInfo.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); m_modTagsToModMass.Add(rowPieces[0], new Tuple <double, string>(Convert.ToDouble(rowPieces[1]), rowPieces[2])); row = reader.ReadLine(); } } var ptmId = 1; var targetId = 1; var ctToPtmId = 1; using (var reader = new StreamReader(directory + "tempMassTags.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var target = new ConsensusTarget(); target.Id = targetId++; var quote = ""; var unescapedPiece = rowPieces[14].Replace("\"\"", quote); var sequence = rowPieces[1]; if (unescapedPiece != "") { var unquotedPiece = unescapedPiece.Substring(1, rowPieces[14].Length - 2); var mods = unquotedPiece.Split(','); foreach (var mod in mods) { var modPieces = mod.Split(':'); var modMass = m_modTagsToModMass[modPieces[0]].Item1; var ptm = new PostTranslationalModification(); ptm.Name = modPieces[0]; if (!m_ptmDictionary.ContainsKey(ptm.Name)) { ptm.Mass = modMass; ptm.Id = ptmId++; ptm.Formula = m_modTagsToModMass[modPieces[0]].Item2; m_ptmDictionary.Add(ptm.Name, ptm); } target.Ptms.Add(m_ptmDictionary[ptm.Name]); ptm.Location = Convert.ToInt32(modPieces[1]); var ctToPtm = new ConsensusPtmPair { ConsensusId = target.Id, PtmId = m_ptmDictionary[ptm.Name].Id, Location = ptm.Location, Id = ctToPtmId++ }; m_consensusTargetToPtmDict[ctToPtm.Id] = ctToPtm; } target.ModificationDescription = unquotedPiece; } var fullSequence = sequence; var backPtms = target.Ptms.OrderByDescending(x => x.Location); foreach (var ptm in backPtms) { if (ptm.Location == rowPieces[1].Length) { rowPieces[1] += ptm.Mass.ToString(); } else { rowPieces[1] = fullSequence.Insert(ptm.Location, ptm.Mass.ToString()); } } target.EncodedNumericSequence = rowPieces[1]; target.Sequence = fullSequence; target.TheoreticalMonoIsotopicMass = Convert.ToDouble(rowPieces[2]); target.MultiProteinCount = Convert.ToInt16(rowPieces[4]); target.ModificationCount = Convert.ToInt16(rowPieces[13]); m_idToMassTagDict.Add(Convert.ToInt32(rowPieces[0]), row); m_idToConensusTargetDict.Add(Convert.ToInt32(rowPieces[0]), target); row = reader.ReadLine(); } } using (var reader = new StreamReader(directory + "tempMassTagsNet.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var id = Convert.ToInt32(rowPieces[0]); m_idToConensusTargetDict[id].PredictedNet = Convert.ToDouble(rowPieces[7]); m_idToConensusTargetDict[id].StdevNet = Convert.ToDouble(rowPieces[5]); m_idToConensusTargetDict[id].AverageNet = Convert.ToDouble(rowPieces[3]); row = reader.ReadLine(); } } var proteinId = 1; using (var reader = new StreamReader(directory + "tempProteins.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var unquotedPiece = rowPieces[1].Substring(1, rowPieces[1].Length - 2); var prot = new ProteinInformation { Id = proteinId++, ProteinName = unquotedPiece }; m_idToProteinDict[Convert.ToInt32(rowPieces[0])] = prot; row = reader.ReadLine(); } } var cppId = 1; using (var reader = new StreamReader(directory + "tempMassTagToProteins.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var mt_id = Convert.ToInt32(rowPieces[0]); var prot_id = Convert.ToInt32(rowPieces[2]); var ctToProt = new ConsensusProteinPair(); ctToProt.CleavageState = Convert.ToInt16(rowPieces[3]); ctToProt.ResidueStart = Convert.ToInt32(rowPieces[6]); ctToProt.ResidueEnd = Convert.ToInt32(rowPieces[7]); ctToProt.TerminusState = Convert.ToInt16(rowPieces[9]); ctToProt.ConsensusId = m_idToConensusTargetDict[mt_id].Id; ctToProt.ProteinId = m_idToProteinDict[prot_id].Id; m_ctToProtDict[cppId] = ctToProt; cppId++; row = reader.ReadLine(); } } var totalCharges = 0; var evId = 1; using (var reader = new StreamReader(directory + "tempPeptides.txt")) { reader.ReadLine(); var row = reader.ReadLine(); while (!string.IsNullOrEmpty(row)) { var rowPieces = row.Split(m_separator); var id = Convert.ToInt32(rowPieces[0]); if (!m_idToChargeAndPeptide.ContainsKey(id)) { m_idToChargeAndPeptide[id] = new Tuple <string, List <short> >(rowPieces[1], new List <short>()); m_idToChargeAndPeptide[id].Item2.Add(Convert.ToInt16(rowPieces[2])); m_idToConensusTargetDict[id].Sequence = rowPieces[1][0] + "." + m_idToConensusTargetDict[id].Sequence + "." + rowPieces[1][rowPieces[1].Length - 1]; m_idToConensusTargetDict[id].CleanSequence = m_idToConensusTargetDict[id].Sequence; m_idToConensusTargetDict[id].Charges.Add(Convert.ToInt16(rowPieces[2])); totalCharges++; } if (!m_idToChargeAndPeptide[id].Item2.Contains(Convert.ToInt16(rowPieces[2]))) { m_idToChargeAndPeptide[id].Item2.Add(Convert.ToInt16(rowPieces[2])); m_idToConensusTargetDict[id].Charges.Add(Convert.ToInt16(rowPieces[2])); totalCharges++; } var ctId = m_idToConensusTargetDict[id].Id; var ev = new Evidence(); ev.Id = evId++; ev.Charge = Convert.ToInt16(rowPieces[2]); ev.Sequence = m_idToConensusTargetDict[id].CleanSequence; ev.Scan = Convert.ToInt32(rowPieces[3]); ev.DelMPpm = Convert.ToDouble(rowPieces[4]); ev.ObservedNet = Convert.ToDouble(rowPieces[5]); ev.ObservedMonoisotopicMass = Convert.ToDouble(rowPieces[6]); ev.Mz = ev.ObservedMonoisotopicMass / ev.Charge; ev.NetShift = 0; ev.DelM = ev.DelMPpm / 1000000; ev.Parent = m_idToConensusTargetDict[id]; m_evidenceDict[evId] = ev; if (!m_ctToEvidenceMap.ContainsKey(ctId)) { m_ctToEvidenceMap[ctId] = new List <int>(); } m_ctToEvidenceMap[ctId].Add(evId); row = reader.ReadLine(); } } }