Exemplo n.º 1
0
        public void WriteResultsToMzid(IEnumerable <DatabaseSearchResultData> matches, string outputFilePath)
        {
            var datasetName = Path.GetFileNameWithoutExtension(outputFilePath);
            var creator     = new IdentDataCreator("MSPathFinder_" + datasetName, "MSPathFinder_" + datasetName);
            var soft        = creator.AddAnalysisSoftware("Software_1", "MSPathFinder", System.Reflection.Assembly.GetCallingAssembly().GetName().Version.ToString(), CV.CVID.MS_MSPathFinder, "MSPathFinder");
            var settings    = creator.AddAnalysisSettings(soft, "Settings_1", CV.CVID.MS_ms_ms_search);
            var searchDb    = creator.AddSearchDatabase(database.GetFastaFilePath(), database.GetNumEntries(), Path.GetFileNameWithoutExtension(database.GetFastaFilePath()), CV.CVID.CVID_Unknown,
                                                        CV.CVID.MS_FASTA_format);

            if (options.TargetDecoySearchMode.HasFlag(DatabaseSearchMode.Decoy))
            {
                searchDb.CVParams.AddRange(new CVParamObj[]
                {
                    new CVParamObj()
                    {
                        Cvid = CV.CVID.MS_DB_composition_target_decoy,
                    },
                    new CVParamObj()
                    {
                        Cvid = CV.CVID.MS_decoy_DB_accession_regexp, Value = "^XXX",
                    },
                    //new CVParamObj() { Cvid = CV.CVID.MS_decoy_DB_type_reverse, },
                    new CVParamObj()
                    {
                        Cvid = CV.CVID.MS_decoy_DB_type_randomized,
                    },
                });
            }

            // store the settings...
            CreateMzidSettings(settings);

            var path = options.SpecFilePath;
            var run  = lcmsRun as PbfLcMsRun;

            if (run != null)
            {
                var rawPath = run.RawFilePath;
                if (!string.IsNullOrWhiteSpace(rawPath))
                {
                    path = rawPath;
                }
            }
            // TODO: fix this to match correctly to the original file - May need to modify the PBF format to add an input format specifier
            // TODO: Should probably? request a CV Term for the PBF format?
            var nativeIdFormat = lcmsRun.NativeIdFormat;

            if (nativeIdFormat == CV.CVID.CVID_Unknown)
            {
                nativeIdFormat = CV.CVID.MS_scan_number_only_nativeID_format;
            }
            var specData = creator.AddSpectraData(path, datasetName, nativeIdFormat, lcmsRun.NativeFormat);

            // Get the search modifications as they were passed into the AminoAcidSet constructor, so we can retrieve masses from them
            var modDict = new Dictionary <string, Modification>();

            foreach (var mod in options.AminoAcidSet.SearchModifications)
            {
                if (!modDict.ContainsKey(mod.Modification.Name))
                {
                    modDict.Add(mod.Modification.Name, mod.Modification);
                }
                else if (!modDict[mod.Modification.Name].Composition.Equals(mod.Modification.Composition))
                {
                    throw new System.Exception(
                              "ERROR: Cannot have modifications with the same name and different composition/mass! Fix input modifications! Duplicated modification name: " +
                              mod.Modification.Name);
                }
            }

            foreach (var match in matches)
            {
                var scanNum  = match.ScanNum;
                var spec     = lcmsRun.GetSpectrum(scanNum, false);
                var matchIon = new Ion(Composition.Parse(match.Composition), match.Charge);

                var nativeId = spec.NativeId;
                if (string.IsNullOrWhiteSpace(spec.NativeId))
                {
                    nativeId = "scan=" + spec.ScanNum;
                }
                var specIdent = creator.AddSpectrumIdentification(specData, nativeId, spec.ElutionTime, match.MostAbundantIsotopeMz,
                                                                  match.Charge, 1, double.NaN);
                specIdent.CalculatedMassToCharge = matchIon.GetMonoIsotopicMz();
                var pep = new PeptideObj(match.Sequence);

                var modText = match.Modifications;
                if (!string.IsNullOrWhiteSpace(modText))
                {
                    var mods = modText.Split(',');
                    foreach (var mod in mods)
                    {
                        var tokens  = mod.Split(' ');
                        var modInfo = modDict[tokens[0]];
                        var modObj  = new ModificationObj(CV.CVID.MS_unknown_modification, modInfo.Name, int.Parse(tokens[1]), modInfo.Mass);
                        pep.Modifications.Add(modObj);
                    }
                }
                specIdent.Peptide = pep;

                var proteinName        = match.ProteinName;
                var protLength         = match.ProteinLength;
                var proteinDescription = match.ProteinDescription;
                var dbSeq = new DbSequenceObj(searchDb, protLength, proteinName, proteinDescription);

                var start = match.Start;
                var end   = match.End;
                var pepEv = new PeptideEvidenceObj(dbSeq, pep, start, end, match.Pre, match.Post, match.ProteinName.StartsWith("XXX"));
                specIdent.AddPeptideEvidence(pepEv);

                var probability = match.Probability;

                specIdent.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_chemical_compound_formula, Value = match.Composition,
                });
                //specIdent.CVParams.Add(new CVParamObj() { Cvid = CV.CVID.MS_number_of_matched_peaks, Value = match.NumMatchedFragments.ToString(), });
                specIdent.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_MSPathFinder_RawScore, Value = probability.ToString(CultureInfo.InvariantCulture),
                });
                specIdent.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_MSPathFinder_SpecEValue, Value = match.SpecEValue.ToString(CultureInfo.InvariantCulture),
                });
                specIdent.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_MSPathFinder_EValue, Value = match.EValue.ToString(CultureInfo.InvariantCulture),
                });
                if (match.HasTdaScores)
                {
                    specIdent.CVParams.Add(new CVParamObj()
                    {
                        Cvid = CV.CVID.MS_MSPathFinder_QValue, Value = match.QValue.ToString(CultureInfo.InvariantCulture),
                    });
                    specIdent.CVParams.Add(new CVParamObj()
                    {
                        Cvid = CV.CVID.MS_MSPathFinder_PepQValue, Value = match.PepQValue.ToString(CultureInfo.InvariantCulture),
                    });
                }
                // MS-GF+ similarity: find/add isotope error?
                // MS-GF+ similarity: find/add assumed dissociation method?
                //specIdent.UserParams.Add(new UserParamObj() {Name = "Assumed Dissociation Method", Value = match.});
            }

            var identData = creator.GetIdentData();

            MzIdentMlReaderWriter.Write(new MzIdentMLType(identData), outputFilePath);
        }
        public void CreateMzidFile()
        {
            var dir         = @"F:\MSPathfinder_Tests";
            var datasetName = "QC_ShewIntact_16_12AUG16_Bane_16-03-19";
            var input       = Path.Combine(dir, "QC_ShewIntact_16_12AUG16_Bane_16-03-19_IcTda.tsv");
            var dbName      = "ID_005435_435B0CDA.fasta";
            // Unused: var database = Path.Combine(dir, "ID_005435_435B0CDA.fasta");
            var output = Path.Combine(dir, "QC_ShewIntact_16_12AUG16_Bane_16-03-19.mzid");

            var creator = new IdentDataCreator("MSPathFinder_" + datasetName, "MSPathFinder_" + datasetName);
            // Unused: var soft = creator.AddAnalysisSoftware("Software_1", "MSPathFinder", "1.3", CV.CVID.CVID_Unknown, "MSPathFinder");
            // Unused: var settings = creator.AddAnalysisSettings(soft, "Settings_1", CV.CVID.MS_ms_ms_search);
            var searchDb = creator.AddSearchDatabase(Path.Combine(dir, dbName), 1000000, dbName, CV.CVID.CVID_Unknown,
                                                     CV.CVID.MS_FASTA_format);
            var specData = creator.AddSpectraData(Path.Combine(dir, datasetName + ".raw"), datasetName, CV.CVID.MS_Thermo_nativeID_format,
                                                  CV.CVID.MS_Thermo_RAW_format);

            foreach (var result in ReadMsPathfinderResults(input))
            {
                var native = "controllerType=0 controllerNumber=1 scan=" + result.Scan;
                var spec   = creator.AddSpectrumIdentification(specData, native, result.Scan, result.MostAbundantIsotopeMz,
                                                               result.Charge, 1, result.MostAbundantIsotopeMz);
                var pep = new PeptideObj(result.Sequence);
                foreach (var mod in result.Modifications)
                {
                    var modObj = new ModificationObj(CV.CVID.MS_unknown_modification, mod.Item1, mod.Item2);
                    pep.Modifications.Add(modObj);
                }
                spec.Peptide = pep;

                var dbSeq = new DbSequenceObj(searchDb, result.ProteinLength, result.ProteinName,
                                              result.ProteinDesc);

                var pepEv = new PeptideEvidenceObj(dbSeq, pep, result.Start, result.End, result.Pre, result.Post, false);
                spec.AddPeptideEvidence(pepEv);

                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_monoisotopic_mass_OBSOLETE, Value = result.Mass.ToString(CultureInfo.InvariantCulture), UnitCvid = CV.CVID.MS_m_z,
                });
                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_chemical_formula, Value = result.Composition,
                });
                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_number_of_matched_peaks, Value = result.NumMatchedFragments.ToString(),
                });
                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_SEQUEST_probability, Value = result.Probability.ToString(CultureInfo.InvariantCulture),
                });
                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_MS_GF_SpecEValue, Value = result.SpecEValue.ToString(CultureInfo.InvariantCulture),
                });
                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_MS_GF_SpecEValue, Value = result.EValue.ToString(CultureInfo.InvariantCulture),
                });
                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_MS_GF_QValue, Value = result.QValue.ToString(CultureInfo.InvariantCulture),
                });
                spec.CVParams.Add(new CVParamObj()
                {
                    Cvid = CV.CVID.MS_MS_GF_PepQValue, Value = result.PepQValue.ToString(CultureInfo.InvariantCulture),
                });
            }

            var identData = creator.GetIdentData();

            MzIdentMlReaderWriter.Write(new MzIdentMLType(identData), output);
        }