public void WriteResultsToMzid(IEnumerable <DatabaseSearchResultData> matches, string outputFilePath) { var datasetName = Path.GetFileNameWithoutExtension(outputFilePath); var creator = new IdentDataCreator("MSPathFinder_" + datasetName, "MSPathFinder_" + datasetName); var soft = creator.AddAnalysisSoftware("Software_1", "MSPathFinder", System.Reflection.Assembly.GetCallingAssembly().GetName().Version.ToString(), CV.CVID.MS_MSPathFinder, "MSPathFinder"); var settings = creator.AddAnalysisSettings(soft, "Settings_1", CV.CVID.MS_ms_ms_search); var searchDb = creator.AddSearchDatabase(database.GetFastaFilePath(), database.GetNumEntries(), Path.GetFileNameWithoutExtension(database.GetFastaFilePath()), CV.CVID.CVID_Unknown, CV.CVID.MS_FASTA_format); if (options.TargetDecoySearchMode.HasFlag(DatabaseSearchMode.Decoy)) { searchDb.CVParams.AddRange(new CVParamObj[] { new CVParamObj() { Cvid = CV.CVID.MS_DB_composition_target_decoy, }, new CVParamObj() { Cvid = CV.CVID.MS_decoy_DB_accession_regexp, Value = "^XXX", }, //new CVParamObj() { Cvid = CV.CVID.MS_decoy_DB_type_reverse, }, new CVParamObj() { Cvid = CV.CVID.MS_decoy_DB_type_randomized, }, }); } // store the settings... CreateMzidSettings(settings); var path = options.SpecFilePath; var run = lcmsRun as PbfLcMsRun; if (run != null) { var rawPath = run.RawFilePath; if (!string.IsNullOrWhiteSpace(rawPath)) { path = rawPath; } } // TODO: fix this to match correctly to the original file - May need to modify the PBF format to add an input format specifier // TODO: Should probably? request a CV Term for the PBF format? var nativeIdFormat = lcmsRun.NativeIdFormat; if (nativeIdFormat == CV.CVID.CVID_Unknown) { nativeIdFormat = CV.CVID.MS_scan_number_only_nativeID_format; } var specData = creator.AddSpectraData(path, datasetName, nativeIdFormat, lcmsRun.NativeFormat); // Get the search modifications as they were passed into the AminoAcidSet constructor, so we can retrieve masses from them var modDict = new Dictionary <string, Modification>(); foreach (var mod in options.AminoAcidSet.SearchModifications) { if (!modDict.ContainsKey(mod.Modification.Name)) { modDict.Add(mod.Modification.Name, mod.Modification); } else if (!modDict[mod.Modification.Name].Composition.Equals(mod.Modification.Composition)) { throw new System.Exception( "ERROR: Cannot have modifications with the same name and different composition/mass! Fix input modifications! Duplicated modification name: " + mod.Modification.Name); } } foreach (var match in matches) { var scanNum = match.ScanNum; var spec = lcmsRun.GetSpectrum(scanNum, false); var matchIon = new Ion(Composition.Parse(match.Composition), match.Charge); var nativeId = spec.NativeId; if (string.IsNullOrWhiteSpace(spec.NativeId)) { nativeId = "scan=" + spec.ScanNum; } var specIdent = creator.AddSpectrumIdentification(specData, nativeId, spec.ElutionTime, match.MostAbundantIsotopeMz, match.Charge, 1, double.NaN); specIdent.CalculatedMassToCharge = matchIon.GetMonoIsotopicMz(); var pep = new PeptideObj(match.Sequence); var modText = match.Modifications; if (!string.IsNullOrWhiteSpace(modText)) { var mods = modText.Split(','); foreach (var mod in mods) { var tokens = mod.Split(' '); var modInfo = modDict[tokens[0]]; var modObj = new ModificationObj(CV.CVID.MS_unknown_modification, modInfo.Name, int.Parse(tokens[1]), modInfo.Mass); pep.Modifications.Add(modObj); } } specIdent.Peptide = pep; var proteinName = match.ProteinName; var protLength = match.ProteinLength; var proteinDescription = match.ProteinDescription; var dbSeq = new DbSequenceObj(searchDb, protLength, proteinName, proteinDescription); var start = match.Start; var end = match.End; var pepEv = new PeptideEvidenceObj(dbSeq, pep, start, end, match.Pre, match.Post, match.ProteinName.StartsWith("XXX")); specIdent.AddPeptideEvidence(pepEv); var probability = match.Probability; specIdent.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_chemical_compound_formula, Value = match.Composition, }); //specIdent.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_number_of_matched_peaks, Value = match.NumMatchedFragments.ToString(), }); specIdent.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_MSPathFinder_RawScore, Value = probability.ToString(CultureInfo.InvariantCulture), }); specIdent.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_MSPathFinder_SpecEValue, Value = match.SpecEValue.ToString(CultureInfo.InvariantCulture), }); specIdent.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_MSPathFinder_EValue, Value = match.EValue.ToString(CultureInfo.InvariantCulture), }); if (match.HasTdaScores) { specIdent.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_MSPathFinder_QValue, Value = match.QValue.ToString(CultureInfo.InvariantCulture), }); specIdent.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_MSPathFinder_PepQValue, Value = match.PepQValue.ToString(CultureInfo.InvariantCulture), }); } // MS-GF+ similarity: find/add isotope error? // MS-GF+ similarity: find/add assumed dissociation method? //specIdent.UserParams.Add(new UserParamObj() {Name = "Assumed Dissociation Method", Value = match.}); } var identData = creator.GetIdentData(); MzIdentMlReaderWriter.Write(new MzIdentMLType(identData), outputFilePath); }
public void CreateMzidFile() { var dir = @"F:\MSPathfinder_Tests"; var datasetName = "QC_ShewIntact_16_12AUG16_Bane_16-03-19"; var input = Path.Combine(dir, "QC_ShewIntact_16_12AUG16_Bane_16-03-19_IcTda.tsv"); var dbName = "ID_005435_435B0CDA.fasta"; // Unused: var database = Path.Combine(dir, "ID_005435_435B0CDA.fasta"); var output = Path.Combine(dir, "QC_ShewIntact_16_12AUG16_Bane_16-03-19.mzid"); var creator = new IdentDataCreator("MSPathFinder_" + datasetName, "MSPathFinder_" + datasetName); // Unused: var soft = creator.AddAnalysisSoftware("Software_1", "MSPathFinder", "1.3", CV.CVID.CVID_Unknown, "MSPathFinder"); // Unused: var settings = creator.AddAnalysisSettings(soft, "Settings_1", CV.CVID.MS_ms_ms_search); var searchDb = creator.AddSearchDatabase(Path.Combine(dir, dbName), 1000000, dbName, CV.CVID.CVID_Unknown, CV.CVID.MS_FASTA_format); var specData = creator.AddSpectraData(Path.Combine(dir, datasetName + ".raw"), datasetName, CV.CVID.MS_Thermo_nativeID_format, CV.CVID.MS_Thermo_RAW_format); foreach (var result in ReadMsPathfinderResults(input)) { var native = "controllerType=0 controllerNumber=1 scan=" + result.Scan; var spec = creator.AddSpectrumIdentification(specData, native, result.Scan, result.MostAbundantIsotopeMz, result.Charge, 1, result.MostAbundantIsotopeMz); var pep = new PeptideObj(result.Sequence); foreach (var mod in result.Modifications) { var modObj = new ModificationObj(CV.CVID.MS_unknown_modification, mod.Item1, mod.Item2); pep.Modifications.Add(modObj); } spec.Peptide = pep; var dbSeq = new DbSequenceObj(searchDb, result.ProteinLength, result.ProteinName, result.ProteinDesc); var pepEv = new PeptideEvidenceObj(dbSeq, pep, result.Start, result.End, result.Pre, result.Post, false); spec.AddPeptideEvidence(pepEv); spec.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_monoisotopic_mass_OBSOLETE, Value = result.Mass.ToString(CultureInfo.InvariantCulture), UnitCvid = CV.CVID.MS_m_z, }); spec.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_chemical_formula, Value = result.Composition, }); spec.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_number_of_matched_peaks, Value = result.NumMatchedFragments.ToString(), }); spec.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_SEQUEST_probability, Value = result.Probability.ToString(CultureInfo.InvariantCulture), }); spec.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_MS_GF_SpecEValue, Value = result.SpecEValue.ToString(CultureInfo.InvariantCulture), }); spec.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_MS_GF_SpecEValue, Value = result.EValue.ToString(CultureInfo.InvariantCulture), }); spec.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_MS_GF_QValue, Value = result.QValue.ToString(CultureInfo.InvariantCulture), }); spec.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_MS_GF_PepQValue, Value = result.PepQValue.ToString(CultureInfo.InvariantCulture), }); } var identData = creator.GetIdentData(); MzIdentMlReaderWriter.Write(new MzIdentMLType(identData), output); }