/// <summary> /// Handles converting the rows to factor objects. /// </summary> /// <param name="sender"></param> /// <param name="args"></param> public void HandleDataRow(object sender, MageDataEventArgs args) { if (args == null) { throw new NullReferenceException("The factors are invalid."); } if (args.Fields == null) { return; throw new NullReferenceException("The factor rows are invalid."); } if (args.Fields.Length < 4) { return; throw new ArgumentException("The number of columns for the factors are invalid."); } var sequence = new DatabaseSearchSequence(); if (m_columnMapping.ContainsKey("Peptide")) { sequence.Sequence = Convert.ToString(args.Fields[m_columnMapping["Peptide"]]); } else { return; } if (m_columnMapping.ContainsKey("ScanNum")) { sequence.Scan = Convert.ToInt32(args.Fields[m_columnMapping["ScanNum"]]); } else { return; } if (m_columnMapping.ContainsKey("XCorr")) { sequence.Score = Convert.ToDouble(args.Fields[m_columnMapping["XCorr"]]); } else { return; } sequence.Id = m_count++; sequence.GroupId = DatasetID; m_sequences.Add(sequence); }
private void ExportMatches( Dictionary <int, Dictionary <int, Dictionary <int, DatabaseSearchSequence> > > sequenceMap, string path, double similarity, List <MsmsCluster> clusters, int id) { using ( TextWriter xwriter = File.CreateText(Path.Combine(path, string.Format("match-sequences-{0}.csv", similarity)))) { xwriter.WriteLine( "cluster id, cluster mean score, dataset id, scan, mz, charge, mass most abundant, find scan, pep seq, score, mass, charge, tryptic ends"); foreach (var cluster in clusters) { cluster.Id = id++; var line = string.Format("{0},{1},", cluster.Id, cluster.MeanScore); // This is temporary. So that we only export clusters with two features. if (cluster.Features.Count > 2) { continue; } // Organize the spectra so they are sorted by dataset. cluster.Features.Sort(delegate(MSFeatureLight x, MSFeatureLight y) { // Sort by scan if they are the same feature. if (x.GroupId == y.GroupId) { return(x.Scan.CompareTo(y.Scan)); } // Otherwise we want to sort by what dataset they came from. return(x.GroupId.CompareTo(y.GroupId)); }); foreach (var feature in cluster.Features) { var findScan = feature.MSnSpectra[0].Scan; DatabaseSearchSequence sequence = null; if (!sequenceMap[feature.GroupId].ContainsKey(findScan)) { line += string.Format("{0},{1},,,,,,,,,", feature.GroupId, feature.Scan); continue; } try { sequence = sequenceMap[feature.GroupId][findScan][feature.ChargeState]; } catch { } var pepsequence = ""; double score = 0; if (sequence != null) { pepsequence = sequence.Sequence; score = sequence.Score; } line += string.Format("{0},{1},{2},{3},{4},{5},{6},{7}", feature.GroupId, feature.Scan, feature.Mz, feature.ChargeState, feature.MassMonoisotopicMostAbundant, findScan, pepsequence, score); } xwriter.WriteLine(line); } } }
public void CacheFeatures(IList <UMCLight> features, IProgress <ProgressData> progress = null) { // SpectraTracker - Makes sure that we only record a MS spectra once, before we cache // this keeps us from trying to put duplicate entries into the MS/MS data // table/container. var spectraTracker = new Dictionary <int, MSSpectra>(); var msmsFeatures = new List <MSSpectra>(); var mappedPeptides = new List <DatabaseSearchSequence>(); var sequenceMaps = new List <SequenceToMsnFeature>(); // This dictionary makes sure that the peptide was not seen already, since a peptide can be mapped multiple times...? var matches = new List <MSFeatureToMSnFeatureMap>(); var msFeatures = new List <MSFeatureLight>(); var peptideId = 0; // Next we may want to map our MSn features to our parents. This would allow us to do traceback... foreach (var feature in features) { var totalMsMs = 0; var totalIdentified = 0; var datasetId = feature.GroupId; msFeatures.AddRange(feature.MsFeatures); // For Each MS Feature foreach (var msFeature in feature.MsFeatures) { totalMsMs += msFeature.MSnSpectra.Count; // For each MS / MS foreach (var spectrum in msFeature.MSnSpectra) { var match = new MSFeatureToMSnFeatureMap { RawDatasetID = datasetId, MSDatasetID = datasetId, MSFeatureID = msFeature.Id, MSMSFeatureID = spectrum.Id, LCMSFeatureID = feature.Id }; spectrum.GroupId = datasetId; matches.Add(match); if (spectraTracker.ContainsKey(spectrum.Id)) { continue; } msmsFeatures.Add(spectrum); spectraTracker.Add(spectrum.Id, spectrum); // We are prepping the sequences that we found from peptides that were // matched only, not all of the results. // These maps here are made to help establish database search results to msms // spectra foreach (var peptide in spectrum.Peptides) { peptide.GroupId = datasetId; var newPeptide = new DatabaseSearchSequence(peptide, feature.Id) { GroupId = datasetId, Id = peptideId++ }; mappedPeptides.Add(newPeptide); var sequenceMap = new SequenceToMsnFeature { UmcFeatureId = feature.Id, DatasetId = msFeature.GroupId, MsnFeatureId = spectrum.Id, SequenceId = peptide.Id }; sequenceMaps.Add(sequenceMap); } totalIdentified += spectrum.Peptides.Count; } } feature.MsMsCount = totalMsMs; feature.IdentifiedSpectraCount = totalIdentified; } var count = 0; //TODO: Fix!!! make sure sequence maps are unique sequenceMaps.ForEach(x => x.Id = count++); var progData = new ProgressData(progress); var internalProgress = new Progress <ProgressData>(pd => progData.Report(pd.Percent)); ////if (msmsFeatures.Count > 0) ////{ //// progData.StepRange(1); //// Providers.MSnFeatureCache.AddAll(msmsFeatures, internalProgress); ////} ////if (matches.Count > 0) ////{ //// progData.StepRange(2); //// Providers.MSFeatureToMSnFeatureCache.AddAll(matches, internalProgress); ////} if (sequenceMaps.Count > 0) { progData.StepRange(3); Providers.SequenceMsnMapCache.AddAll(sequenceMaps, internalProgress); } if (mappedPeptides.Count > 0) { progData.StepRange(4); Providers.DatabaseSequenceCache.AddAll(mappedPeptides, internalProgress); } ////if (msFeatures.Count > 0) ////{ //// progData.StepRange(99); //// Providers.MSFeatureCache.DeleteByDatasetId(msFeatures[0].GroupId); //// Providers.MSFeatureCache.AddAllStateless(msFeatures, internalProgress); ////} if (features.Count > 0) { progData.StepRange(100); Providers.FeatureCache.DeleteByDataset(features[0].GroupId); Providers.FeatureCache.AddAllStateless(features, internalProgress); } }