Пример #1
0
        /// <summary>
        /// Parse the header line of a SEQUEST _syn.txt file, populating columnMapping
        /// </summary>
        /// <param name="lineIn"></param>
        /// <param name="columnMapping"></param>
        /// <returns>True if successful, false if an error</returns>
        private bool ParseSequestSynFileHeaderLine(string lineIn, IDictionary <SequestSynopsisFileColumns, int> columnMapping)
        {
            var columnNames = SequestSynFileReader.GetColumnHeaderNamesAndIDs();

            columnMapping.Clear();

            try
            {
                // Initialize each entry in columnMapping to -1
                foreach (SequestSynopsisFileColumns resultColumn in Enum.GetValues(typeof(SequestSynopsisFileColumns)))
                {
                    columnMapping.Add(resultColumn, -1);
                }

                var splitLine = lineIn.Split('\t');
                for (var index = 0; index < splitLine.Length; index++)
                {
                    if (columnNames.TryGetValue(splitLine[index], out var resultFileColumn))
                    {
                        // Recognized column name; update columnMapping
                        columnMapping[resultFileColumn] = index;
                    }
                }
            }
            catch (Exception ex)
            {
                SetErrorMessage("Error parsing the header line in the Sequest synopsis file", ex);
                return(false);
            }

            return(true);
        }
Пример #2
0
        private static void TestPHRPReader(string synOrFhtFile, bool blnSkipDuplicates)
        {
            var inputFile = new FileInfo(synOrFhtFile);

            Console.WriteLine("Instantiating reader");
            var startupOptions = new StartupOptions
            {
                LoadModsAndSeqInfo = true,
                LoadMSGFResults    = true,
                LoadScanStatsData  = false,
                MaxProteinsPerPSM  = 100
            };

            var phrpReader =
                new ReaderFactory(inputFile.FullName, PeptideHitResultTypes.Unknown, startupOptions)
            {
                EchoMessagesToConsole = false,
                SkipDuplicatePSMs     = blnSkipDuplicates
            };

            // Check for any load errors
            if (phrpReader.ErrorMessages.Count > 0)
            {
                Console.WriteLine("Error(s) instantiating the reader:");
                foreach (var errorMessage in phrpReader.ErrorMessages)
                {
                    Console.WriteLine("  " + errorMessage);
                }
            }

            phrpReader.ErrorEvent   += ErrorEventHandler;
            phrpReader.StatusEvent  += MessageEventHandler;
            phrpReader.WarningEvent += WarningEventHandler;

            const bool fastReadEnabled = true;

            phrpReader.FastReadMode = fastReadEnabled;

            var massCalculator = new PeptideMassCalculator();

            if (!phrpReader.CanRead)
            {
                Console.WriteLine("Aborting since PHRPReader is not ready: " + phrpReader.ErrorMessage);
                return;
            }

            var lstValues = new List <string>();

            var intPSMsRead         = 0;
            var intModifiedPSMsRead = 0;

            // ReSharper disable once CollectionNeverQueried.Local
            var dctCachedValues = new Dictionary <int, PSM>();

            Console.WriteLine("Reading data");

            while (phrpReader.MoveNext())
            {
                var psm = phrpReader.CurrentPSM;

                intPSMsRead += 1;
                lstValues.Clear();

                phrpReader.FinalizeCurrentPSM();

                PeptideCleavageStateCalculator.SplitPrefixAndSuffixFromSequence(psm.Peptide, out _, out _, out _);

                var strMassErrorPPM = GetCorrectedMassErrorPPM(psm, out _);

                lstValues.Add(phrpReader.DatasetName + "_dta.txt");                                                                          // #SpecFile
                lstValues.Add("index=" + intPSMsRead);                                                                                       // SpecID
                lstValues.Add(psm.ScanNumber.ToString());                                                                                    // ScanNum
                lstValues.Add(psm.CollisionMode);                                                                                            // FragMethod
                lstValues.Add(massCalculator.ConvoluteMass(psm.PrecursorNeutralMass, 0, psm.Charge).ToString(CultureInfo.InvariantCulture)); // Precursor m/z

                lstValues.Add(strMassErrorPPM);                                                                                              // PrecursorError(ppm)
                lstValues.Add(psm.Charge.ToString());                                                                                        // Charge
                lstValues.Add(psm.NumTrypticTermini.ToString());                                                                             // Tryptic state (0, 1, or 2)
                lstValues.Add(CleanupPeptide(psm.PeptideWithNumericMods));                                                                   // Peptide

                if (psm.SeqID <= 0)
                {
                    lstValues.Add("**" + psm.SeqID + "**");                 // SeqID is undefined
                }
                else
                {
                    lstValues.Add(psm.SeqID.ToString());                    // SeqID
                }

                lstValues.Add(psm.ProteinFirst);

                if (psm.ProteinDetails.Count > 0)
                {
                    var firstProteinDetail = psm.ProteinDetails.First();

                    if (!string.Equals(psm.ProteinFirst, firstProteinDetail.Key))
                    {
                        lstValues.Add(firstProteinDetail.Key);
                    }
                    else
                    {
                        lstValues.Add("<Match>");
                    }
                    lstValues.Add(firstProteinDetail.Value.ResidueStart.ToString());
                    lstValues.Add(firstProteinDetail.Value.ResidueEnd.ToString());
                }

                var strXCorr = GetScore(psm, SequestSynFileReader.GetColumnNameByID(SequestSynopsisFileColumns.XCorr), "0");
                lstValues.Add(strXCorr);

                lstValues.Add(GetScore(psm, SequestSynFileReader.GetColumnNameByID(SequestSynopsisFileColumns.Sp), "0"));
                lstValues.Add(psm.MSGFSpecEValue);
                lstValues.Add(GetScore(psm, SequestSynFileReader.GetColumnNameByID(SequestSynopsisFileColumns.DeltaCn2), "0"));

                lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetMSGFDBColumnNameByID(MSGFDBSynFileColumns.PValue), "0"));
                lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetColumnNameByID(MSGFPlusSynFileColumns.EValue), "0"));
                lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetColumnNameByID(MSGFPlusSynFileColumns.RankSpecEValue), "0"));
                lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetMSGFDBColumnNameByID(MSGFDBSynFileColumns.FDR), "1"));
                lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetColumnNameByID(MSGFPlusSynFileColumns.QValue), "0"));
                lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetColumnNameByID(MSGFPlusSynFileColumns.PepQValue), "0"));


                if (psm.PeptideCleanSequence == "QQIEESTSDYDKEK")
                {
                    Console.WriteLine(psm.Peptide + " in scan " + psm.ScanNumber);

                    var parentIonMZ = massCalculator.ConvoluteMass(psm.PrecursorNeutralMass, 0, psm.Charge);

                    Console.WriteLine("ParentIonMZ   = " + parentIonMZ);
                    Console.WriteLine("PeptideWithNumericMods   = " + psm.PeptideWithNumericMods);
                }

                if (psm.ModifiedResidues.Count > 0)
                {
                    intModifiedPSMsRead += 1;

                    if (intModifiedPSMsRead % 500 == 0)
                    {
                        Console.WriteLine("PeptideWithNumericMods   = " + psm.PeptideWithNumericMods);
                        foreach (var modifiedResidue in psm.ModifiedResidues)
                        {
                            Console.WriteLine("  " + modifiedResidue.Residue + modifiedResidue.EndResidueLocInPeptide + ": " + modifiedResidue.ModDefinition.ModificationMassAsText);
                        }
                    }

                    var dblPeptideMassRecomputed = massCalculator.ComputeSequenceMassNumericMods(psm.PeptideWithNumericMods);
                    if (Math.Abs(psm.PeptideMonoisotopicMass - dblPeptideMassRecomputed) > 0.1)
                    {
                        Console.WriteLine("  Peptide mass disagreement: " + (psm.PeptideMonoisotopicMass - dblPeptideMassRecomputed).ToString("0.0000000"));
                    }
                }

                var strFlattened = FlattenList(lstValues);

                if (intPSMsRead % 10000 == 0)
                {
                    Console.WriteLine(strFlattened);
                }

                dctCachedValues.Add(intPSMsRead, psm);
            }
        }