public void SetSequenceAndMods(string sequence, IList <Modification> fixedMods, string variableMods) { Peptide = new CSMSL.Proteomics.Peptide(sequence); Peptide.SetModifications(fixedMods); Modificationstring = variableMods; foreach (Tuple <Modification, int> modTuple in OmssaModification.ParseModificationLine(variableMods)) { Modification mod = modTuple.Item1; int site = modTuple.Item2; if (site == 1 && mod.Sites.HasFlag(ModificationSites.NPep)) { Peptide.AddModification(mod, Terminus.N); } else if (site == Peptide.Length && mod.Sites.HasFlag(ModificationSites.PepC)) { Peptide.AddModification(mod, Terminus.C); } else { Peptide.AddModification(mod, site); } } }
private List <PSM> LoadAllPSMs(string csvFile, string rawFileDirectory, List <Modification> fixedMods) { ProgressUpdate(0.0); //force the progressbar to go into marquee mode Log("Reading PSMs from " + csvFile); Dictionary <string, ThermoRawFile> rawFiles = Directory.EnumerateFiles(rawFileDirectory, "*.raw", SearchOption.AllDirectories) .ToDictionary(Path.GetFileNameWithoutExtension, file => new ThermoRawFile(file)); _dataFiles = new HashSet <MSDataFile>(); List <PSM> psms = new List <PSM>(); int totalPsms = 0; using (CsvReader reader = new CsvReader(new StreamReader(csvFile), true)) { while (reader.ReadNextRecord()) { string mods = reader["Mods"]; totalPsms++; // Skip if there are no modifications if (string.IsNullOrEmpty(mods)) { continue; } // Convert the text mod line into a list of modification objects List <Modification> variableMods = OmssaModification.ParseModificationLine(mods).Select(item => item.Item1).ToList(); // Only keep things with quantified Modifications if (!variableMods.Any(mod => QuantifiedModifications.Contains(mod))) { continue; } string filename = reader["Filename/id"]; string rawname = filename.Split('.')[0]; int scanNumber = int.Parse(reader["Spectrum number"]); PSM psm = new PSM(scanNumber, rawname); psm.StartResidue = int.Parse(reader["Start"]); psm.Charge = int.Parse(reader["Charge"]); psm.BasePeptide = new Peptide(reader["Peptide"].ToUpper()); psm.Defline = reader["Defline"]; psm.ProteinGroup = reader["Best PG Name"]; psm.NumberOfSharingProteinGroups = int.Parse(reader["# of Sharing PGs"]); psm.Filename = filename; // Apply all the fix modifications psm.BasePeptide.SetModifications(fixedMods); int i = 0; while (i < variableMods.Count) { if (fixedMods.Contains(variableMods[i])) { variableMods.RemoveAt(i); } else { i++; } } // Save all the variable mod types psm.VariabledModifications = variableMods; psms.Add(psm); } } Log(string.Format("{0:N0} PSMs were loaded....", totalPsms)); Log(string.Format("{0:N0} PSMs were kept.... ({1:F2} %)", psms.Count, 100.0 * (double)psms.Count / totalPsms)); Log("Reading Spectral Data..."); ThermoRawFile currentRawFile = null; string currentRawFileName = null; int counter = 0; foreach (PSM psm in psms.OrderBy(psm => psm.RawFileName)) { string rawfilename = psm.RawFileName; if (!rawfilename.Equals(currentRawFileName)) { currentRawFileName = rawfilename; if (currentRawFile != null && currentRawFile.IsOpen) { currentRawFile.Dispose(); } if (!rawFiles.TryGetValue(rawfilename, out currentRawFile)) { throw new NullReferenceException(string.Format("Raw File: {0}.raw was not found! Aborting.", rawfilename)); } currentRawFile.Open(); } psm.SetRawFile(currentRawFile); counter++; if (counter % 25 == 0) { ProgressUpdate((double)counter / psms.Count); } } return(psms); }
private List <Protein> CompileResults(List <LocalizedHit> hits, string csvFile, string outputDirectory, bool breakProteinsApart = false) { Dictionary <string, LocalizedHit> hitsdict = new Dictionary <string, LocalizedHit>(); // Group all the localized Hits into proteins Dictionary <string, Protein> proteins = new Dictionary <string, Protein>(); foreach (LocalizedHit hit in hits) { hitsdict.Add(hit.PSM.Filename, hit); string defline = hit.PSM.Defline; if (breakProteinsApart) { string[] groups = hit.PSM.ProteinGroup.Split('|'); foreach (string group in groups) { Protein prot; if (!proteins.TryGetValue(group, out prot)) { prot = new Protein(group, defline); proteins.Add(group, prot); } prot.AddHit(hit); } } else { Protein prot; if (!proteins.TryGetValue(hit.PSM.ProteinGroup, out prot)) { prot = new Protein(hit.PSM.ProteinGroup, defline); proteins.Add(hit.PSM.ProteinGroup, prot); } prot.AddHit(hit); } } using (StreamWriter writer = new StreamWriter(Path.Combine(outputDirectory, Path.GetFileNameWithoutExtension(csvFile) + "_all.csv")), localizedWriter = new StreamWriter(Path.Combine(outputDirectory, Path.GetFileNameWithoutExtension(csvFile) + "_localized.csv"))) { using (CsvReader reader = new CsvReader(new StreamReader(csvFile), true)) { LocalizedHit hit = null; headerInfo = reader.GetFieldHeaders(); bool tqFound = false; for (int i = 0; i < reader.FieldCount; i++) { if (headerInfo[i].EndsWith("NL)")) { if (!tqFound) { FirstQuantColumn = i; tqFound = true; } } if (headerInfo[i] == "Channels Detected") { LastQuantColumn = i - 1; } } string header = string.Join(",", headerInfo) + ",# Isoforms,# of Considered Fragments,Localized?,Delta Score,Best Isoform,Spectral Matches,% TIC,Second Best Isoform,Second Spectral Matches,Second % TIC"; writer.WriteLine(header); localizedWriter.WriteLine(header); while (reader.ReadNextRecord()) { string mods = reader["Mods"]; if (string.IsNullOrEmpty(mods)) { continue; } List <Modification> variableMods = OmssaModification.ParseModificationLine(mods).Select(item => item.Item1).OfType <Modification>().ToList(); // Only keep things with quantified Modifications if (!variableMods.Any(mod => QuantifiedModifications.Contains(mod))) { continue; } string filename = reader["Filename/id"]; if (!hitsdict.TryGetValue(filename, out hit)) { continue; } string[] data = new string[reader.FieldCount]; reader.CopyCurrentRecordTo(data); hit.omssapsm = data; StringBuilder sb = new StringBuilder(); foreach (string datum in data) { if (datum.Contains(',')) { sb.Append("\""); sb.Append(datum); sb.Append("\""); } else { sb.Append(datum); } sb.Append(','); } sb.Append(hit.PSM.Isoforms); sb.Append(','); sb.Append(hit.LocalizedIsoform.Fragments.Count); sb.Append(','); sb.Append(hit.IsLocalized); sb.Append(','); sb.Append(hit.MatchDifference); sb.Append(','); sb.Append(hit.LocalizedIsoform.SequenceWithModifications); sb.Append(','); sb.Append(hit.LocalizedIsoform.SpectralMatch.Matches); sb.Append(','); sb.Append(hit.LocalizedIsoform.SpectralMatch.PercentTIC); if (hit.PSM.Isoforms > 1) { //sb.Append(','); //sb.Append(hit.BestPeptideSDFCount); sb.Append(','); sb.Append(hit.SecondBestPeptideIsoform.SequenceWithModifications); sb.Append(','); sb.Append(hit.SecondBestPeptideIsoform.SpectralMatch.Matches); sb.Append(','); sb.Append(hit.SecondBestPeptideIsoform.SpectralMatch.PercentTIC); //sb.Append(','); //sb.Append(hit.SecondBestPeptideSDFCount); } if (hit.IsLocalized) { localizedWriter.WriteLine(sb.ToString()); } writer.WriteLine(sb.ToString()); } } } return(proteins.Values.ToList()); }