public static void CreateTestProteins(ISession session, IList<string> testProteinSequences) { var bulkInserter = new BulkInserter(session.Connection); for (int i = 0; i < testProteinSequences.Count; ++i) { int id = i + 1; var protein = new TestProtein() { Id = id, Accession = "PRO" + id.ToString(), Description = "Protein " + id.ToString(), Sequence = testProteinSequences[i] }; var proteinAccessor = new PrivateObject(protein); proteinAccessor.SetProperty("Length", testProteinSequences[i].Length); bulkInserter.Add(protein); } bulkInserter.Execute(); bulkInserter.Reset(""); }
private static void createTestPeptideInstances (NHibernate.ISession session, BulkInserter bulkInserter, Peptide pep) { // store instances even though the association is inverse: // the PeptideModification.Offset property needs access to the protein sequence pep.Instances = new List<PeptideInstance>(); foreach (Protein pro in session.Query<Protein>()) { int start = pro.Sequence.IndexOf(pep.Sequence, 0); while (start >= 0) { int end = start + pep.Sequence.Length; bool nTerminusIsSpecific = start == 0 || pro.Sequence[start - 1] == 'K' || pro.Sequence[start - 1] == 'R'; bool cTerminusIsSpecific = end == pro.Sequence.Length || pro.Sequence[end - 1] == 'K' || pro.Sequence[end - 1] == 'R'; var instance = new TestPI() { Peptide = pep, Protein = pro, Offset = start, Length = pep.Sequence.Length, MissedCleavages = pep.Sequence.ToCharArray(0, pep.Sequence.Length - 1).Count(o => o == 'K' || o == 'R'), NTerminusIsSpecific = nTerminusIsSpecific, CTerminusIsSpecific = cTerminusIsSpecific, }; var instanceAccessor = new PrivateObject(instance); instanceAccessor.SetProperty("SpecificTermini", (nTerminusIsSpecific ? 1 : 0) + (cTerminusIsSpecific ? 1 : 0)); bulkInserter.Add(instance); pep.Instances.Add(instance); start = pro.Sequence.IndexOf(pep.Sequence, start + 1); } } if (pep.Instances.Count == 0) throw new ArgumentException("peptide " + pep.Sequence + " does not occur in any proteins"); }
public static void CreateTestData (NHibernate.ISession session, IList<SpectrumTuple> testPsmSummary) { var dbGroups = new Map<string, SpectrumSourceGroup>(); foreach (var ssg in session.Query<SpectrumSourceGroup>()) dbGroups[ssg.Name] = ssg; var dbSources = new Map<long, SpectrumSource>(); foreach (var ss in session.Query<SpectrumSource>()) dbSources[ss.Id.Value] = ss; var dbAnalyses = new Map<long, Analysis>(); foreach (var a in session.Query<Analysis>()) dbAnalyses[a.Id.Value] = a; var dbPeptides = new Map<string, Peptide>(); foreach (var pep in session.Query<Peptide>()) dbPeptides[pep.Sequence] = pep; var bulkInserter = new BulkInserter(session.Connection); long lastPsmId = session.CreateQuery("SELECT MAX(Id) FROM PeptideSpectrumMatch").UniqueResult<long?>().GetValueOrDefault(); long lastModId = session.CreateQuery("SELECT MAX(Id) FROM Modification").UniqueResult<long?>().GetValueOrDefault(); long lastPmId = session.CreateQuery("SELECT MAX(Id) FROM PeptideModification").UniqueResult<long?>().GetValueOrDefault(); long lastGroupId = session.CreateQuery("SELECT MAX(Id) FROM SpectrumSourceGroup").UniqueResult<long?>().GetValueOrDefault(); long lastSourceId = session.CreateQuery("SELECT MAX(Id) FROM SpectrumSource").UniqueResult<long?>().GetValueOrDefault(); long lastSglId = session.CreateQuery("SELECT MAX(Id) FROM SpectrumSourceGroupLink").UniqueResult<long?>().GetValueOrDefault(); foreach (SpectrumTuple row in testPsmSummary) { string groupName = row.Group; string sourceName = "Source " + row.Source; string analysisId = "Engine " + row.Analysis; string peptideTuples = row.PeptideTuples; SpectrumSourceGroup group = dbGroups[groupName]; if (String.IsNullOrEmpty(group.Name)) { group.Id = ++lastGroupId; group.Name = groupName; bulkInserter.Add(group); } SpectrumSource source = dbSources[row.Source]; if (String.IsNullOrEmpty(source.Name)) { source.Id = ++lastSourceId; source.Name = sourceName; source.Group = group; source.Spectra = new List<Spectrum>(); bulkInserter.Add(source); // add a source group link for the source's immediate group bulkInserter.Add(new SpectrumSourceGroupLink() { Id = ++lastSglId, Group = group, Source = source }); #region add source group links for all of the immediate group's parent groups if (groupName != "/") { string parentGroupName = groupName.Substring(0, groupName.LastIndexOf("/")); while (true) { if (String.IsNullOrEmpty(parentGroupName)) parentGroupName = "/"; // add the parent group if it doesn't exist yet SpectrumSourceGroup parentGroup = session.UniqueResult<SpectrumSourceGroup>(o => o.Name == parentGroupName); if (parentGroup == null) { parentGroup = new SpectrumSourceGroup() { Id = ++lastGroupId, Name = parentGroupName }; bulkInserter.Add(parentGroup); } bulkInserter.Add(new SpectrumSourceGroupLink() { Id = ++lastSglId, Group = parentGroup, Source = source }); if (parentGroupName == "/") break; parentGroupName = parentGroupName.Substring(0, parentGroupName.LastIndexOf("/")); } } #endregion } Spectrum spectrum = source.Spectra.SingleOrDefault(o => o.Source.Id == source.Id && o.Index == row.Spectrum - 1); if (spectrum == null) { spectrum = new Spectrum() { Id = source.Id * 10000 + row.Spectrum, Index = row.Spectrum - 1, NativeID = "scan=" + row.Spectrum, Source = source, PrecursorMZ = 42 }; source.Spectra.Add(spectrum); bulkInserter.Add(spectrum); } Analysis analysis = dbAnalyses[row.Analysis]; if (String.IsNullOrEmpty(analysis.Name)) { analysis.Id = dbAnalyses.Max(o => o.Value.Id).GetValueOrDefault() + 1; analysis.Name = analysisId + " 1.0"; analysis.Software = new AnalysisSoftware() {Name = analysisId, Version = "1.0"}; analysis.StartTime = DateTime.Today.AddHours(row.Analysis); analysis.Type = AnalysisType.DatabaseSearch; analysis.Parameters = new SortedSet<AnalysisParameter>() { new AnalysisParameter() { Id = analysis.Id * 10000, Analysis = analysis, Name = "Parameter 1", Value = "Value 1" } }; bulkInserter.Add(analysis); } // make sure peptides are sorted by their score divider (which will determine rank) var peptideList = new SortedList<int, List<PeptideTuple>>(); foreach (string tuple in peptideTuples.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries)) { var peptideTuple = new PeptideTuple() { Sequence = tuple.Split('@', '/')[0], Charge = Convert.ToInt32(tuple.Split('@', '/')[1]), ScoreDivider = Convert.ToInt32(tuple.Split('@', '/')[2]) }; if (!peptideList.ContainsKey(peptideTuple.ScoreDivider)) peptideList[peptideTuple.ScoreDivider] = new List<PeptideTuple>(); peptideList[peptideTuple.ScoreDivider].Add(peptideTuple); } int rank = 1; int lastDivider = 1; foreach (var peptideTupleList in peptideList.Values) foreach (var peptideTuple in peptideTupleList) { using (PwizPeptide pwizPeptide = new PwizPeptide(peptideTuple.Sequence, ModParsing.ModificationParsing_Auto, ModDelimiter.ModificationDelimiter_Brackets)) { Peptide peptide = dbPeptides[pwizPeptide.sequence]; if (String.IsNullOrEmpty(peptide.Sequence)) { peptide = new TestPeptide(pwizPeptide.sequence); peptide.Id = dbPeptides.Max(o => o.Value.Id).GetValueOrDefault() + 1; peptide.MonoisotopicMass = pwizPeptide.monoisotopicMass(false); peptide.MolecularWeight = pwizPeptide.molecularWeight(false); dbPeptides[pwizPeptide.sequence] = peptide; bulkInserter.Add(peptide); createTestPeptideInstances(session, bulkInserter, peptide); } double neutralPrecursorMass = (spectrum.PrecursorMZ*peptideTuple.Charge) - (peptideTuple.Charge*Proton.Mass); var psm = new PeptideSpectrumMatch() { Id = ++lastPsmId, Peptide = peptide, Spectrum = spectrum, Analysis = analysis, ObservedNeutralMass = neutralPrecursorMass, MonoisotopicMassError = neutralPrecursorMass - pwizPeptide.monoisotopicMass(), MolecularWeightError = neutralPrecursorMass - pwizPeptide.molecularWeight(), Charge = peptideTuple.Charge, Rank = (peptideTuple.ScoreDivider == lastDivider ? rank : ++rank), QValue = (rank == 1 ? row.QValue : PeptideSpectrumMatch.DefaultQValue), }; if (row.Score != null) psm.Scores = new Dictionary<string, double>() { {"score1", (double) row.Score/peptideTuple.ScoreDivider}, {"score2", 1/((double) row.Score/peptideTuple.ScoreDivider)} }; bulkInserter.Add(psm); lastDivider = peptideTuple.ScoreDivider; // add PeptideModifications and Modifications foreach (KeyValuePair<int, ModList> itr in pwizPeptide.modifications()) { foreach (PwizMod pwizMod in itr.Value) { Modification mod = session.UniqueResult<Modification>(o => o.Formula == pwizMod.formula()); if (mod == null) { mod = new Modification() { Id = ++lastModId, Formula = pwizMod.formula(), MonoMassDelta = pwizMod.monoisotopicDeltaMass(), AvgMassDelta = pwizMod.averageDeltaMass(), Name = pwizMod.formula() }; bulkInserter.Add(mod); } bulkInserter.Add(new PeptideModification() { Id = ++lastPmId, PeptideSpectrumMatch = psm, Modification = mod, Offset = itr.Key == ModMap.NTerminus() ? int.MinValue : itr.Key == ModMap.CTerminus() ? int.MaxValue : itr.Key }); } } } } } bulkInserter.Execute(); bulkInserter.Reset(""); }