Exemple #1
0
        public void TestUnimodNameMapping()
        {
            var termGroup = CV.TermAccessionLookup["UNIMOD"].Where(x => x.Value != CV.CVID.UNIMOD_unimod_root_node);

            foreach (var term in termGroup)
            {
                var info = CV.TermData[term.Value];
                var mod  = new ModificationObj(CV.CVID.CVID_Unknown, info.Name, 5, 42);
                if (mod.CVParams.Count == 1)
                {
                    Assert.AreEqual(term.Value, mod.CVParams[0].Cvid, "Enums do not match!");
                }
                else
                {
                    var foundMatch = false;
                    foreach (var cvParam in mod.CVParams)
                    {
                        if (cvParam.Cvid.Equals(term.Value))
                        {
                            foundMatch = true;
                            break;
                        }
                    }
                    if (!foundMatch)
                    {
                        Assert.Fail("No matching enum found! {0}", term.Value);
                    }
                }
            }
        }
Exemple #2
0
        public void WriteResultsToMzid(IEnumerable <DatabaseSearchResultData> matches, string outputFilePath)
        {
            var datasetName = Path.GetFileNameWithoutExtension(outputFilePath);
            var creator     = new IdentDataCreator("MSPathFinder_" + datasetName, "MSPathFinder_" + datasetName);
            var soft        = creator.AddAnalysisSoftware("Software_1", "MSPathFinder", System.Reflection.Assembly.GetCallingAssembly().GetName().Version.ToString(), CV.CVID.MS_MSPathFinder, "MSPathFinder");
            var settings    = creator.AddAnalysisSettings(soft, "Settings_1", CV.CVID.MS_ms_ms_search);
            var searchDb    = creator.AddSearchDatabase(database.GetFastaFilePath(), database.GetNumEntries(), Path.GetFileNameWithoutExtension(database.GetFastaFilePath()), CV.CVID.CVID_Unknown,
                                                        CV.CVID.MS_FASTA_format);

            if (options.TargetDecoySearchMode.HasFlag(DatabaseSearchMode.Decoy))
            {
                searchDb.CVParams.AddRange(new CVParamObj[]
                {
                    new CVParamObj()
                    {
                        Cvid = CV.CVID.MS_DB_composition_target_decoy,
                    },
                    new CVParamObj()
                    {
                        Cvid = CV.CVID.MS_decoy_DB_accession_regexp, Value = "^XXX",
                    },
                    //new CVParamObj() { Cvid = CV.CVID.MS_decoy_DB_type_reverse, },
                    new CVParamObj()
                    {
                        Cvid = CV.CVID.MS_decoy_DB_type_randomized,
                    },
                });
            }

            // store the settings...
            CreateMzidSettings(settings);

            var path = options.SpecFilePath;
            var run  = lcmsRun as PbfLcMsRun;

            if (run != null)
            {
                var rawPath = run.RawFilePath;
                if (!string.IsNullOrWhiteSpace(rawPath))
                {
                    path = rawPath;
                }
            }
            // TODO: fix this to match correctly to the original file - May need to modify the PBF format to add an input format specifier
            // TODO: Should probably? request a CV Term for the PBF format?
            var nativeIdFormat = lcmsRun.NativeIdFormat;

            if (nativeIdFormat == CV.CVID.CVID_Unknown)
            {
                nativeIdFormat = CV.CVID.MS_scan_number_only_nativeID_format;
            }
            var specData = creator.AddSpectraData(path, datasetName, nativeIdFormat, lcmsRun.NativeFormat);

            // Get the search modifications as they were passed into the AminoAcidSet constructor, so we can retrieve masses from them
            var modDict = new Dictionary <string, Modification>();

            foreach (var mod in options.AminoAcidSet.SearchModifications)
            {
                if (!modDict.ContainsKey(mod.Modification.Name))
                {
                    modDict.Add(mod.Modification.Name, mod.Modification);
                }
                else if (!modDict[mod.Modification.Name].Composition.Equals(mod.Modification.Composition))
                {
                    throw new System.Exception(
                              "ERROR: Cannot have modifications with the same name and different composition/mass! Fix input modifications! Duplicated modification name: " +
                              mod.Modification.Name);
                }
            }

            foreach (var match in matches)
            {
                var scanNum  = match.ScanNum;
                var spec     = lcmsRun.GetSpectrum(scanNum, false);
                var matchIon = new Ion(Composition.Parse(match.Composition), match.Charge);

                var nativeId = spec.NativeId;
                if (string.IsNullOrWhiteSpace(spec.NativeId))
                {
                    nativeId = "scan=" + spec.ScanNum;
                }
                var specIdent = creator.AddSpectrumIdentification(specData, nativeId, spec.ElutionTime, match.MostAbundantIsotopeMz,
                                                                  match.Charge, 1, double.NaN);
                specIdent.CalculatedMassToCharge = matchIon.GetMonoIsotopicMz();
                var pep = new PeptideObj(match.Sequence);

                var modText = match.Modifications;
                if (!string.IsNullOrWhiteSpace(modText))
                {
                    var mods = modText.Split(',');
                    foreach (var mod in mods)
                    {
                        var tokens  = mod.Split(' ');
                        var modInfo = modDict[tokens[0]];
                        var modObj  = new ModificationObj(CV.CVID.MS_unknown_modification, modInfo.Name, int.Parse(tokens[1]), modInfo.Mass);
                        pep.Modifications.Add(modObj);
                    }
                }
                specIdent.Peptide = pep;

                var proteinName        = match.ProteinName;
                var protLength         = match.ProteinLength;
                var proteinDescription = match.ProteinDescription;
                var dbSeq = new DbSequenceObj(searchDb, protLength, proteinName, proteinDescription);

                var start = match.Start;
                var end   = match.End;
                var pepEv = new PeptideEvidenceObj(dbSeq, pep, start, end, match.Pre, match.Post, match.ProteinName.StartsWith("XXX"));
                specIdent.AddPeptideEvidence(pepEv);

                var probability = match.Probability;

                specIdent.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_chemical_compound_formula, Value = match.Composition,
                });
                //specIdent.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_number_of_matched_peaks, Value = match.NumMatchedFragments.ToString(), });
                specIdent.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_MSPathFinder_RawScore, Value = probability.ToString(CultureInfo.InvariantCulture),
                });
                specIdent.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_MSPathFinder_SpecEValue, Value = match.SpecEValue.ToString(CultureInfo.InvariantCulture),
                });
                specIdent.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_MSPathFinder_EValue, Value = match.EValue.ToString(CultureInfo.InvariantCulture),
                });
                if (match.HasTdaScores)
                {
                    specIdent.CVParams.Add(new CVParamObj()
                    {
                        Cvid = CV.CVID.MS_MSPathFinder_QValue, Value = match.QValue.ToString(CultureInfo.InvariantCulture),
                    });
                    specIdent.CVParams.Add(new CVParamObj()
                    {
                        Cvid = CV.CVID.MS_MSPathFinder_PepQValue, Value = match.PepQValue.ToString(CultureInfo.InvariantCulture),
                    });
                }
                // MS-GF+ similarity: find/add isotope error?
                // MS-GF+ similarity: find/add assumed dissociation method?
                //specIdent.UserParams.Add(new UserParamObj() {Name = "Assumed Dissociation Method", Value = match.});
            }

            var identData = creator.GetIdentData();

            MzIdentMlReaderWriter.Write(new MzIdentMLType(identData), outputFilePath);
        }
Exemple #3
0
        private void CreateMzidSettings(SpectrumIdentificationProtocolObj settings)
        {
            settings.AdditionalSearchParams.Items.AddRange(new ParamBaseObj[]
            {
                new CVParamObj(CV.CVID.MS_parent_mass_type_mono),
                new CVParamObj(CV.CVID.MS_fragment_mass_type_mono),
                new UserParamObj()
                {
                    Name = "TargetDecoyApproach", Value = (options.TargetDecoySearchMode == DatabaseSearchMode.Both).ToString()
                },
                new UserParamObj()
                {
                    Name = "MinSequenceLength", Value = options.MinSequenceLength.ToString()
                },
                new UserParamObj()
                {
                    Name = "MaxSequenceLength", Value = options.MaxSequenceLength.ToString()
                },
                new UserParamObj()
                {
                    Name = "MaxNumNTermCleavages", Value = options.MaxNumNTermCleavages.ToString()
                },
                new UserParamObj()
                {
                    Name = "MaxNumCTermCleavages", Value = options.MaxNumCTermCleavages.ToString()
                },
                new UserParamObj()
                {
                    Name = "MinPrecursorIonCharge", Value = options.MinPrecursorIonCharge.ToString()
                },
                new UserParamObj()
                {
                    Name = "MaxPrecursorIonCharge", Value = options.MaxPrecursorIonCharge.ToString()
                },
                new UserParamObj()
                {
                    Name = "MinProductIonCharge", Value = options.MinProductIonCharge.ToString()
                },
                new UserParamObj()
                {
                    Name = "MaxProductIonCharge", Value = options.MaxProductIonCharge.ToString()
                },
                new UserParamObj()
                {
                    Name = "MinSequenceMass", Value = options.MinSequenceMass.ToString(CultureInfo.InvariantCulture)
                },
                new UserParamObj()
                {
                    Name = "MaxSequenceMass", Value = options.MaxSequenceMass.ToString(CultureInfo.InvariantCulture)
                },
                new UserParamObj()
                {
                    Name = "PrecursorIonTolerance", Value = options.PrecursorIonTolerance.ToString()
                },
                new UserParamObj()
                {
                    Name = "ProductIonTolerance", Value = options.ProductIonTolerance.ToString()
                },
                new UserParamObj()
                {
                    Name = "SearchMode", Value = options.InternalCleavageMode.ToString()
                },
                new UserParamObj()
                {
                    Name = "NumMatchesPerSpectrum", Value = options.NumMatchesPerSpectrum.ToString()
                },
                new UserParamObj()
                {
                    Name = "TagBasedSearch", Value = options.TagBasedSearch.ToString()
                },
            });

            var activationMethod = options.ActivationMethod.ToString();

            if (options.ActivationMethod == ActivationMethod.Unknown)
            {
                activationMethod = $"Determined By Spectrum ({options.ActivationMethod})";
            }
            settings.AdditionalSearchParams.Items.Add(new UserParamObj()
            {
                Name = "SpecifiedActivationMethod", Value = activationMethod
            });

            // Add search type, if not a target-deacoy search
            if (options.TargetDecoySearchMode != DatabaseSearchMode.Both)
            {
                settings.AdditionalSearchParams.Items.Add(new UserParamObj()
                {
                    Name = "SearchType", Value = options.TargetDecoySearchMode.ToString()
                });
            }

            // Get the search modifications as they were passed into the AminoAcidSet constructor...
            foreach (var mod in options.AminoAcidSet.SearchModifications)
            {
                var modObj = new SearchModificationObj()
                {
                    FixedMod  = mod.IsFixedModification,
                    MassDelta = (float)mod.Modification.Mass,
                    Residues  = mod.TargetResidue.ToString(),
                };
                // "*" is used for wildcard residue N-Term or C-Term modifications. mzIdentML standard says that "." should be used instead.
                if (modObj.Residues.Contains("*"))
                {
                    modObj.Residues = modObj.Residues.Replace("*", ".");
                }
                // Really only using this for the modification name parsing for CVParams that exists with ModificationObj
                var tempMod = new ModificationObj(CV.CVID.MS_unknown_modification, mod.Modification.Name, 0, modObj.MassDelta);
                modObj.CVParams.Add(tempMod.CVParams.First());

                if (mod.Location != SequenceLocation.Everywhere)
                {
                    // specificity rules should be added
                    var rule = new SpecificityRulesListObj();
                    switch (mod.Location)
                    {
                    case SequenceLocation.PeptideNTerm:
                        rule.CVParams.Add(new CVParamObj(CV.CVID.MS_modification_specificity_peptide_N_term));
                        break;

                    case SequenceLocation.PeptideCTerm:
                        rule.CVParams.Add(new CVParamObj(CV.CVID.MS_modification_specificity_peptide_C_term));
                        break;

                    case SequenceLocation.ProteinNTerm:
                        rule.CVParams.Add(new CVParamObj(CV.CVID.MS_modification_specificity_protein_N_term));
                        break;

                    case SequenceLocation.ProteinCTerm:
                        rule.CVParams.Add(new CVParamObj(CV.CVID.MS_modification_specificity_protein_C_term));
                        break;

                    case SequenceLocation.Everywhere:
                        // not needed, the enclosing if should prevent ever hitting this
                        break;

                    default:
                        // Limited by enum...
                        break;
                    }
                    modObj.SpecificityRules.Add(rule);
                }

                settings.ModificationParams.Add(modObj);
            }

            // No enzyme for top-down search
            //settings.Enzymes.Enzymes.Add(new EnzymeObj());

            settings.ParentTolerances.AddRange(new CVParamObj[]
            {
                new CVParamObj(CV.CVID.MS_search_tolerance_plus_value, options.PrecursorIonTolerancePpm.ToString(CultureInfo.InvariantCulture))
                {
                    UnitCvid = CV.CVID.UO_parts_per_million
                },
                new CVParamObj(CV.CVID.MS_search_tolerance_minus_value, options.PrecursorIonTolerancePpm.ToString(CultureInfo.InvariantCulture))
                {
                    UnitCvid = CV.CVID.UO_parts_per_million
                },
            });
            settings.FragmentTolerances.AddRange(new CVParamObj[]
            {
                new CVParamObj(CV.CVID.MS_search_tolerance_plus_value, options.ProductIonTolerancePpm.ToString(CultureInfo.InvariantCulture))
                {
                    UnitCvid = CV.CVID.UO_parts_per_million
                },
                new CVParamObj(CV.CVID.MS_search_tolerance_minus_value, options.ProductIonTolerancePpm.ToString(CultureInfo.InvariantCulture))
                {
                    UnitCvid = CV.CVID.UO_parts_per_million
                },
            });
            settings.Threshold.Items.Add(new CVParamObj(CV.CVID.MS_no_threshold));
        }
        public void CreateMzidFile()
        {
            var dir         = @"F:\MSPathfinder_Tests";
            var datasetName = "QC_ShewIntact_16_12AUG16_Bane_16-03-19";
            var input       = Path.Combine(dir, "QC_ShewIntact_16_12AUG16_Bane_16-03-19_IcTda.tsv");
            var dbName      = "ID_005435_435B0CDA.fasta";
            // Unused: var database = Path.Combine(dir, "ID_005435_435B0CDA.fasta");
            var output = Path.Combine(dir, "QC_ShewIntact_16_12AUG16_Bane_16-03-19.mzid");

            var creator = new IdentDataCreator("MSPathFinder_" + datasetName, "MSPathFinder_" + datasetName);
            // Unused: var soft = creator.AddAnalysisSoftware("Software_1", "MSPathFinder", "1.3", CV.CVID.CVID_Unknown, "MSPathFinder");
            // Unused: var settings = creator.AddAnalysisSettings(soft, "Settings_1", CV.CVID.MS_ms_ms_search);
            var searchDb = creator.AddSearchDatabase(Path.Combine(dir, dbName), 1000000, dbName, CV.CVID.CVID_Unknown,
                                                     CV.CVID.MS_FASTA_format);
            var specData = creator.AddSpectraData(Path.Combine(dir, datasetName + ".raw"), datasetName, CV.CVID.MS_Thermo_nativeID_format,
                                                  CV.CVID.MS_Thermo_RAW_format);

            foreach (var result in ReadMsPathfinderResults(input))
            {
                var native = "controllerType=0 controllerNumber=1 scan=" + result.Scan;
                var spec   = creator.AddSpectrumIdentification(specData, native, result.Scan, result.MostAbundantIsotopeMz,
                                                               result.Charge, 1, result.MostAbundantIsotopeMz);
                var pep = new PeptideObj(result.Sequence);
                foreach (var mod in result.Modifications)
                {
                    var modObj = new ModificationObj(CV.CVID.MS_unknown_modification, mod.Item1, mod.Item2);
                    pep.Modifications.Add(modObj);
                }
                spec.Peptide = pep;

                var dbSeq = new DbSequenceObj(searchDb, result.ProteinLength, result.ProteinName,
                                              result.ProteinDesc);

                var pepEv = new PeptideEvidenceObj(dbSeq, pep, result.Start, result.End, result.Pre, result.Post, false);
                spec.AddPeptideEvidence(pepEv);

                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_monoisotopic_mass_OBSOLETE, Value = result.Mass.ToString(CultureInfo.InvariantCulture), UnitCvid = CV.CVID.MS_m_z,
                });
                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_chemical_formula, Value = result.Composition,
                });
                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_number_of_matched_peaks, Value = result.NumMatchedFragments.ToString(),
                });
                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_SEQUEST_probability, Value = result.Probability.ToString(CultureInfo.InvariantCulture),
                });
                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_MS_GF_SpecEValue, Value = result.SpecEValue.ToString(CultureInfo.InvariantCulture),
                });
                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_MS_GF_SpecEValue, Value = result.EValue.ToString(CultureInfo.InvariantCulture),
                });
                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_MS_GF_QValue, Value = result.QValue.ToString(CultureInfo.InvariantCulture),
                });
                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_MS_GF_PepQValue, Value = result.PepQValue.ToString(CultureInfo.InvariantCulture),
                });
            }

            var identData = creator.GetIdentData();

            MzIdentMlReaderWriter.Write(new MzIdentMLType(identData), output);
        }