Пример #1
0
 public void SetSequenceAndMods(string sequence, IList <Modification> fixedMods, string variableMods)
 {
     Peptide = new CSMSL.Proteomics.Peptide(sequence);
     Peptide.SetModifications(fixedMods);
     Modificationstring = variableMods;
     foreach (Tuple <Modification, int> modTuple in OmssaModification.ParseModificationLine(variableMods))
     {
         Modification mod  = modTuple.Item1;
         int          site = modTuple.Item2;
         if (site == 1 && mod.Sites.HasFlag(ModificationSites.NPep))
         {
             Peptide.AddModification(mod, Terminus.N);
         }
         else if (site == Peptide.Length && mod.Sites.HasFlag(ModificationSites.PepC))
         {
             Peptide.AddModification(mod, Terminus.C);
         }
         else
         {
             Peptide.AddModification(mod, site);
         }
     }
 }
Пример #2
0
        private List <PSM> LoadAllPSMs(string csvFile, string rawFileDirectory, List <Modification> fixedMods)
        {
            ProgressUpdate(0.0); //force the progressbar to go into marquee mode
            Log("Reading PSMs from " + csvFile);

            Dictionary <string, ThermoRawFile> rawFiles =
                Directory.EnumerateFiles(rawFileDirectory, "*.raw", SearchOption.AllDirectories)
                .ToDictionary(Path.GetFileNameWithoutExtension, file => new ThermoRawFile(file));

            _dataFiles = new HashSet <MSDataFile>();

            List <PSM> psms = new List <PSM>();

            int totalPsms = 0;

            using (CsvReader reader = new CsvReader(new StreamReader(csvFile), true))
            {
                while (reader.ReadNextRecord())
                {
                    string mods = reader["Mods"];

                    totalPsms++;

                    // Skip if there are no modifications
                    if (string.IsNullOrEmpty(mods))
                    {
                        continue;
                    }

                    // Convert the text mod line into a list of modification objects
                    List <Modification> variableMods = OmssaModification.ParseModificationLine(mods).Select(item => item.Item1).ToList();

                    // Only keep things with quantified Modifications
                    if (!variableMods.Any(mod => QuantifiedModifications.Contains(mod)))
                    {
                        continue;
                    }

                    string filename = reader["Filename/id"];
                    string rawname  = filename.Split('.')[0];

                    int scanNumber = int.Parse(reader["Spectrum number"]);

                    PSM psm = new PSM(scanNumber, rawname);
                    psm.StartResidue = int.Parse(reader["Start"]);
                    psm.Charge       = int.Parse(reader["Charge"]);
                    psm.BasePeptide  = new Peptide(reader["Peptide"].ToUpper());
                    psm.Defline      = reader["Defline"];
                    psm.ProteinGroup = reader["Best PG Name"];
                    psm.NumberOfSharingProteinGroups = int.Parse(reader["# of Sharing PGs"]);
                    psm.Filename = filename;

                    // Apply all the fix modifications
                    psm.BasePeptide.SetModifications(fixedMods);

                    int i = 0;
                    while (i < variableMods.Count)
                    {
                        if (fixedMods.Contains(variableMods[i]))
                        {
                            variableMods.RemoveAt(i);
                        }
                        else
                        {
                            i++;
                        }
                    }

                    // Save all the variable mod types
                    psm.VariabledModifications = variableMods;

                    psms.Add(psm);
                }
            }

            Log(string.Format("{0:N0} PSMs were loaded....", totalPsms));
            Log(string.Format("{0:N0} PSMs were kept.... ({1:F2} %)", psms.Count, 100.0 * (double)psms.Count / totalPsms));

            Log("Reading Spectral Data...");
            ThermoRawFile currentRawFile     = null;
            string        currentRawFileName = null;
            int           counter            = 0;

            foreach (PSM psm in psms.OrderBy(psm => psm.RawFileName))
            {
                string rawfilename = psm.RawFileName;

                if (!rawfilename.Equals(currentRawFileName))
                {
                    currentRawFileName = rawfilename;
                    if (currentRawFile != null && currentRawFile.IsOpen)
                    {
                        currentRawFile.Dispose();
                    }

                    if (!rawFiles.TryGetValue(rawfilename, out currentRawFile))
                    {
                        throw new NullReferenceException(string.Format("Raw File: {0}.raw was not found! Aborting.", rawfilename));
                    }
                    currentRawFile.Open();
                }

                psm.SetRawFile(currentRawFile);
                counter++;
                if (counter % 25 == 0)
                {
                    ProgressUpdate((double)counter / psms.Count);
                }
            }

            return(psms);
        }
Пример #3
0
        private List <Protein> CompileResults(List <LocalizedHit> hits, string csvFile, string outputDirectory, bool breakProteinsApart = false)
        {
            Dictionary <string, LocalizedHit> hitsdict = new Dictionary <string, LocalizedHit>();

            // Group all the localized Hits into proteins
            Dictionary <string, Protein> proteins = new Dictionary <string, Protein>();

            foreach (LocalizedHit hit in hits)
            {
                hitsdict.Add(hit.PSM.Filename, hit);


                string defline = hit.PSM.Defline;

                if (breakProteinsApart)
                {
                    string[] groups = hit.PSM.ProteinGroup.Split('|');
                    foreach (string group in groups)
                    {
                        Protein prot;
                        if (!proteins.TryGetValue(group, out prot))
                        {
                            prot = new Protein(group, defline);
                            proteins.Add(group, prot);
                        }
                        prot.AddHit(hit);
                    }
                }
                else
                {
                    Protein prot;
                    if (!proteins.TryGetValue(hit.PSM.ProteinGroup, out prot))
                    {
                        prot = new Protein(hit.PSM.ProteinGroup, defline);
                        proteins.Add(hit.PSM.ProteinGroup, prot);
                    }
                    prot.AddHit(hit);
                }
            }
            using (StreamWriter writer = new StreamWriter(Path.Combine(outputDirectory, Path.GetFileNameWithoutExtension(csvFile) + "_all.csv")),
                   localizedWriter = new StreamWriter(Path.Combine(outputDirectory, Path.GetFileNameWithoutExtension(csvFile) + "_localized.csv")))
            {
                using (CsvReader reader = new CsvReader(new StreamReader(csvFile), true))
                {
                    LocalizedHit hit = null;
                    headerInfo = reader.GetFieldHeaders();
                    bool tqFound = false;
                    for (int i = 0; i < reader.FieldCount; i++)
                    {
                        if (headerInfo[i].EndsWith("NL)"))
                        {
                            if (!tqFound)
                            {
                                FirstQuantColumn = i;
                                tqFound          = true;
                            }
                        }
                        if (headerInfo[i] == "Channels Detected")
                        {
                            LastQuantColumn = i - 1;
                        }
                    }
                    string header = string.Join(",", headerInfo) + ",# Isoforms,# of Considered Fragments,Localized?,Delta Score,Best Isoform,Spectral Matches,% TIC,Second Best Isoform,Second Spectral Matches,Second % TIC";
                    writer.WriteLine(header);
                    localizedWriter.WriteLine(header);
                    while (reader.ReadNextRecord())
                    {
                        string mods = reader["Mods"];
                        if (string.IsNullOrEmpty(mods))
                        {
                            continue;
                        }

                        List <Modification> variableMods = OmssaModification.ParseModificationLine(mods).Select(item => item.Item1).OfType <Modification>().ToList();

                        // Only keep things with quantified Modifications
                        if (!variableMods.Any(mod => QuantifiedModifications.Contains(mod)))
                        {
                            continue;
                        }

                        string filename = reader["Filename/id"];
                        if (!hitsdict.TryGetValue(filename, out hit))
                        {
                            continue;
                        }

                        string[] data = new string[reader.FieldCount];
                        reader.CopyCurrentRecordTo(data);

                        hit.omssapsm = data;

                        StringBuilder sb = new StringBuilder();

                        foreach (string datum in data)
                        {
                            if (datum.Contains(','))
                            {
                                sb.Append("\"");
                                sb.Append(datum);
                                sb.Append("\"");
                            }
                            else
                            {
                                sb.Append(datum);
                            }
                            sb.Append(',');
                        }
                        sb.Append(hit.PSM.Isoforms);
                        sb.Append(',');
                        sb.Append(hit.LocalizedIsoform.Fragments.Count);
                        sb.Append(',');
                        sb.Append(hit.IsLocalized);
                        sb.Append(',');
                        sb.Append(hit.MatchDifference);
                        sb.Append(',');
                        sb.Append(hit.LocalizedIsoform.SequenceWithModifications);
                        sb.Append(',');
                        sb.Append(hit.LocalizedIsoform.SpectralMatch.Matches);
                        sb.Append(',');
                        sb.Append(hit.LocalizedIsoform.SpectralMatch.PercentTIC);
                        if (hit.PSM.Isoforms > 1)
                        {
                            //sb.Append(',');
                            //sb.Append(hit.BestPeptideSDFCount);
                            sb.Append(',');
                            sb.Append(hit.SecondBestPeptideIsoform.SequenceWithModifications);
                            sb.Append(',');
                            sb.Append(hit.SecondBestPeptideIsoform.SpectralMatch.Matches);
                            sb.Append(',');
                            sb.Append(hit.SecondBestPeptideIsoform.SpectralMatch.PercentTIC);
                            //sb.Append(',');
                            //sb.Append(hit.SecondBestPeptideSDFCount);
                        }

                        if (hit.IsLocalized)
                        {
                            localizedWriter.WriteLine(sb.ToString());
                        }
                        writer.WriteLine(sb.ToString());
                    }
                }
            }

            return(proteins.Values.ToList());
        }