Exemplo n.º 1
0
        /// <summary>
        /// Generates a sequence based on final best peptide sequence.
        /// </summary>
        /// <param name="seq">unmodified sequence</param>
        /// <param name="myParam">ascore parameters</param>
        /// <param name="peptideMods">peptide modification position array</param>
        /// <returns></returns>
        private string GenerateFinalSequences(string seq, ParameterFileManager myParam, IReadOnlyList <int> peptideMods)
        {
            var sbFinalSeq = new System.Text.StringBuilder(seq.Length);

            for (var i = 0; i < seq.Length; i++)
            {
                if (i >= peptideMods.Count)
                {
                    // Invalid index for i; assume the residue is not modified
                    sbFinalSeq.Append(seq[i]);
                }
                else if (peptideMods[i] == 0)
                {
                    sbFinalSeq.Append(seq[i]);
                }
                else
                {
                    foreach (var dynamicMod in myParam.DynamicMods)
                    {
                        if (peptideMods[i] == dynamicMod.UniqueID)
                        {
                            sbFinalSeq.Append(seq[i] + dynamicMod.ModSymbol.ToString(CultureInfo.InvariantCulture));
                        }
                    }
                }
            }

            return(sbFinalSeq.ToString());
        }
Exemplo n.º 2
0
        private static List <double> GetChargeList(ParameterFileManager ascoreParameters, double mzmax, double mzmin, TheoreticalSpectra theoMono,
                                                   TheoreticalSpectra theoAve, int[] myPositions, out Dictionary <int, ChargeStateIons> toWrite)
        {
            Dictionary <int, ChargeStateIons> mySpectraMono = theoMono.GetTempSpectra(myPositions,
                                                                                      ascoreParameters.DynamicMods, MassType.Monoisotopic);
            Dictionary <int, ChargeStateIons> mySpectraAverage = null;

            if (ascoreParameters.FragmentMassTolerance <= 0.05)
            {
                mySpectraAverage = theoAve.GetTempSpectra(myPositions,
                                                          ascoreParameters.DynamicMods, MassType.Average);
            }
            //Get ions within m/z range
            Dictionary <int, ChargeStateIons> mySpectra = new Dictionary <int, ChargeStateIons>();

            if (ascoreParameters.FragmentMassTolerance <= 0.05)
            {
                mySpectra.Add(1, mySpectraMono[1]);
                foreach (int charge in mySpectraAverage.Keys)
                {
                    if (charge != 1)
                    {
                        mySpectra.Add(charge, mySpectraAverage[charge]);
                    }
                }
            }
            else
            {
                mySpectra = mySpectraMono;
            }
            toWrite = mySpectra;
            List <double> myIons = GetCurrentComboTheoreticalIons(mzmax, mzmin, mySpectra);

            return(myIons);
        }
Exemplo n.º 3
0
 /// <summary>
 /// Gets a clean sequence initializes dynamic modifications
 /// </summary>
 /// <param name="seq">input protein sequence including mod characters, but without the prefix or suffix residues</param>
 /// <param name="ascoreParams">ascore parameters reference</param>
 /// <returns>protein sequence without mods as well as changing ascoreParams</returns>
 private string GetCleanSequence(string seq, ref ParameterFileManager ascoreParams)
 {
     foreach (var dynamicMod in ascoreParams.DynamicMods)
     {
         var newSeq = seq.Replace(dynamicMod.ModSymbol.ToString(), string.Empty);
         dynamicMod.Count = seq.Length - newSeq.Length;
         seq = newSeq;
     }
     return(seq);
 }
Exemplo n.º 4
0
        /// <summary>
        /// Configure and run the AScore algorithm, optionally can add protein mapping information
        /// </summary>
        /// <param name="spectraManager"></param>
        /// <param name="psmResultsManager"></param>
        /// <param name="ascoreParams"></param>
        /// <param name="outputFilePath">Name of the output file</param>
        /// <param name="fastaFilePath">Path to FASTA file. If this is empty/null, protein mapping will not occur</param>
        /// <param name="outputDescriptions">Whether to include protein description line in output or not.</param>
        public void RunAScoreOnSingleFile(
            SpectraManagerCache spectraManager,
            PsmResultsManager psmResultsManager,
            ParameterFileManager ascoreParams,
            string outputFilePath,
            string fastaFilePath    = "",
            bool outputDescriptions = false
            )
        {
            var ascoreOptions = new AScoreOptions
            {
                FastaFilePath             = fastaFilePath,
                OutputProteinDescriptions = outputDescriptions
            };

            ascoreOptions.SetAScoreResultsFilePath(outputFilePath);

            RunAScoreOnSingleFile(ascoreOptions, spectraManager, psmResultsManager, ascoreParams);
        }
Exemplo n.º 5
0
        /// <summary>
        /// Configure and run the AScore algorithm, optionally can add protein mapping information
        /// </summary>
        /// <param name="ascoreOptions"></param>
        /// <param name="spectraManager"></param>
        /// <param name="psmResultsManager"></param>
        /// <param name="ascoreParams"></param>
        public void RunAScoreWithMappingFile(
            AScoreOptions ascoreOptions,
            SpectraManagerCache spectraManager,
            PsmResultsManager psmResultsManager,
            ParameterFileManager ascoreParams)
        {
            var requiredColumns = new List <string>
            {
                "Job",
                "Dataset"
            };

            OnStatusEvent("Reading Job to Dataset Map File: " + PathUtils.CompactPathString(ascoreOptions.JobToDatasetMapFile, 80));

            ReadJobToDatasetMapFile(ascoreOptions, requiredColumns, out var jobToDatasetNameMap);

            RunAScoreOnPreparedData(jobToDatasetNameMap, spectraManager, psmResultsManager, ascoreParams, ascoreOptions, false);

            ProteinMapperTestRun(ascoreOptions);
        }
Exemplo n.º 6
0
        /// <summary>
        /// Configure and run the AScore algorithm, optionally can add protein mapping information
        /// </summary>
        /// <param name="ascoreOptions"></param>
        /// <param name="spectraManager"></param>
        /// <param name="psmResultsManager"></param>
        /// <param name="ascoreParams"></param>
        public void RunAScoreOnSingleFile(
            AScoreOptions ascoreOptions,
            SpectraManagerCache spectraManager,
            PsmResultsManager psmResultsManager,
            ParameterFileManager ascoreParams)
        {
            var jobToDatasetNameMap = new Dictionary <string, DatasetFileInfo>
            {
                {
                    psmResultsManager.JobNum,
                    new DatasetFileInfo(spectraManager.SpectrumFilePath, spectraManager.ModSummaryFilePath)
                }
            };

            if (spectraManager == null || !spectraManager.Initialized)
            {
                throw new Exception(
                          "spectraManager must be instantiated and initialized before calling RunAScoreOnSingleFile for a single source file");
            }

            RunAScoreOnPreparedData(jobToDatasetNameMap, spectraManager, psmResultsManager, ascoreParams, ascoreOptions, true);

            ProteinMapperTestRun(ascoreOptions);
        }
Exemplo n.º 7
0
        /// <summary>
        /// Gets a list of ions for matching
        /// </summary>
        /// <param name="ascoreParams">AScore parameters</param>
        /// <param name="mzMax"></param>
        /// <param name="mzMin"></param>
        /// <param name="theoreticalMonoMassSpectra"></param>
        /// <param name="theoreticalAverageMassSpectra"></param>
        /// <param name="myPositions"></param>
        /// <returns></returns>
        private List <double> GetChargeList(ParameterFileManager ascoreParams, double mzMax, double mzMin,
                                            TheoreticalSpectra theoreticalMonoMassSpectra,
                                            TheoreticalSpectra theoreticalAverageMassSpectra,
                                            int[] myPositions)
        {
            const double FRAGMENT_MASS_TOLERANCE = 0.0501;

            var mySpectraMono = theoreticalMonoMassSpectra.GetTempSpectra(myPositions,
                                                                          ascoreParams.DynamicMods, MassType.Monoisotopic);

            var mySpectra = new Dictionary <int, ChargeStateIons>();

            if (ascoreParams.FragmentMassTolerance <= FRAGMENT_MASS_TOLERANCE)
            {
                var mySpectraAverage = theoreticalAverageMassSpectra.GetTempSpectra(myPositions,
                                                                                    ascoreParams.DynamicMods, MassType.Average);

                //Get ions within m/z range
                mySpectra.Add(1, mySpectraMono[1]);
                foreach (var charge in mySpectraAverage.Keys)
                {
                    if (charge != 1)
                    {
                        mySpectra.Add(charge, mySpectraAverage[charge]);
                    }
                }
            }
            else
            {
                mySpectra = mySpectraMono;
            }

            var myIons = GetCurrentComboTheoreticalIons(mzMax, mzMin, mySpectra);

            return(myIons);
        }
Exemplo n.º 8
0
        /// <summary>
        /// Generates a sequence based on final best peptide sequence.
        /// </summary>
        /// <param name="seq">unmodified sequence</param>
        /// <param name="myParam">ascore parameters</param>
        /// <param name="peptides">peptide modification position array</param>
        /// <returns></returns>
        private static string GenerateFinalSequences(string seq, ParameterFileManager myParam, int[] peptides)
        {
            string finalSeq = "";

            char[] c = seq.ToCharArray();
            for (int i = 0; i < c.Length; i++)
            {
                if (peptides[i] == 0)
                {
                    finalSeq += "" + c[i];
                }
                else
                {
                    foreach (Mod.DynamicModification dmod in myParam.DynamicMods)
                    {
                        if (peptides[i] == dmod.UniqueID)
                        {
                            finalSeq += "" + c[i] + "" + dmod.ModSymbol;
                        }
                    }
                }
            }
            return(finalSeq);
        }
Exemplo n.º 9
0
 /// <summary>
 /// Gets a clean sequence intitializes dynamic modifications
 /// </summary>
 /// <param name="seq">input protein sequence including mod characters</param>
 /// <param name="ascoreParameterss">ascore parameters reference</param>
 /// <returns>protein sequence without mods as well as changing ascoreParameterss</returns>
 private static string GetCleanSequence(string seq, ref ParameterFileManager ascoreParameterss)
 {
     seq = seq.Split('.')[1];
     foreach (Mod.DynamicModification dmod in ascoreParameterss.DynamicMods)
     {
         string s = "";
         //xml hates sequest
         if (dmod.ModSymbol == '*')
         {
             s = @"\*";
         }
         else if (dmod.ModSymbol == '^')
         {
             s = @"\^";
         }
         else
         {
             s = dmod.ModSymbol.ToString();
         }
         dmod.Count = Regex.Matches(seq, s).Count;
         seq        = seq.Replace(dmod.ModSymbol.ToString(), string.Empty);
     }
     return(seq);
 }
Exemplo n.º 10
0
        /// <summary>
        /// Configure and run the AScore algorithm
        /// </summary>
        /// <param name="ascoreOptions"></param>
        /// <returns></returns>
        public int RunAScore(AScoreOptions ascoreOptions)
        {
            var paramManager = new ParameterFileManager(ascoreOptions.AScoreParamFile);

            RegisterEvents(paramManager);

            Console.WriteLine();

            if (paramManager.DynamicMods.Count > 0 || paramManager.StaticMods.Count > 0)
            {
                OnStatusEvent("Loaded modifications from: " + ascoreOptions.AScoreParamFile);

                foreach (var mod in paramManager.StaticMods)
                {
                    OnStatusEvent(Utilities.GetModDescription("Static,   ", mod));
                }

                foreach (var mod in paramManager.DynamicMods)
                {
                    OnStatusEvent(Utilities.GetModDescription("Dynamic,  ", mod));
                }

                foreach (var mod in paramManager.TerminiMods)
                {
                    OnStatusEvent(Utilities.GetModDescription("Terminus, ", mod));
                }

                Console.WriteLine();
            }

            PsmResultsManager psmResultsManager;

            switch (ascoreOptions.SearchType)
            {
            case AScoreOptions.SearchMode.XTandem:
                OnStatusEvent("Caching data in " + PathUtils.CompactPathString(ascoreOptions.DbSearchResultsFile, 80));
                psmResultsManager = new XTandemFHT(ascoreOptions.DbSearchResultsFile);
                break;

            case AScoreOptions.SearchMode.Sequest:
                OnStatusEvent("Caching data in " + PathUtils.CompactPathString(ascoreOptions.DbSearchResultsFile, 80));
                psmResultsManager = new SequestFHT(ascoreOptions.DbSearchResultsFile);
                break;

            case AScoreOptions.SearchMode.Inspect:
                OnStatusEvent("Caching data in " + PathUtils.CompactPathString(ascoreOptions.DbSearchResultsFile, 80));
                psmResultsManager = new InspectFHT(ascoreOptions.DbSearchResultsFile);
                break;

            case AScoreOptions.SearchMode.Msgfdb:
            case AScoreOptions.SearchMode.Msgfplus:
                OnStatusEvent("Caching data in " + PathUtils.CompactPathString(ascoreOptions.DbSearchResultsFile, 80));
                if (ascoreOptions.SearchResultsType == AScoreOptions.DbSearchResultsType.Mzid)
                {
                    if (ascoreOptions.CreateUpdatedDbSearchResultsFile)
                    {
                        psmResultsManager = new MsgfMzidFull(ascoreOptions.DbSearchResultsFile);
                    }
                    else
                    {
                        psmResultsManager = new MsgfMzid(ascoreOptions.DbSearchResultsFile);
                    }
                }
                else
                {
                    psmResultsManager = new MsgfdbFHT(ascoreOptions.DbSearchResultsFile);
                }
                break;

            default:
                OnErrorEvent(string.Format(
                                 "Incorrect search type: {0} , supported values are {1}",
                                 ascoreOptions.SearchType,
                                 string.Join(", ", Enum.GetNames(typeof(AScoreOptions.SearchMode)))
                                 ));
                return(-13);
            }
            var peptideMassCalculator = new PeptideMassCalculator();

            var spectraManager = new SpectraManagerCache(peptideMassCalculator);

            RegisterEvents(spectraManager);

            OnStatusEvent("Output directory: " + ascoreOptions.OutputDirectoryInfo.FullName);

            var ascoreEngine = new AScoreAlgorithm();

            RegisterEvents(ascoreEngine);

            // Initialize the options
            FilterOnMSGFScore = ascoreOptions.FilterOnMSGFScore;

            // Run the algorithm
            if (ascoreOptions.MultiJobMode)
            {
                RunAScoreWithMappingFile(ascoreOptions, spectraManager, psmResultsManager, paramManager);
            }
            else
            {
                spectraManager.OpenFile(ascoreOptions.MassSpecFile, ascoreOptions.ModSummaryFile);

                RunAScoreOnSingleFile(ascoreOptions, spectraManager, psmResultsManager, paramManager);
            }

            OnStatusEvent("AScore Complete");

            if (ascoreOptions.CreateUpdatedDbSearchResultsFile)
            {
                if (ascoreOptions.SearchResultsType == AScoreOptions.DbSearchResultsType.Fht)
                {
                    CreateUpdatedFirstHitsFile(ascoreOptions);
                }
                else if (psmResultsManager is MsgfMzidFull mzidFull)
                {
                    mzidFull.WriteToMzidFile(ascoreOptions.UpdatedDbSearchResultsFileName);
                    OnStatusEvent("Results merged; new file: " + PathUtils.CompactPathString(ascoreOptions.UpdatedDbSearchResultsFileName, 80));
                }
            }

            return(0);
        }
Exemplo n.º 11
0
        /// <summary>
        /// Runs the all the tools necessary to perform an ascore run
        /// </summary>
        /// <param name="jobToDatasetNameMap">Keys are job numbers (stored as strings); values are Dataset Names or the path to the _dta.txt file</param>
        /// <param name="spectraManager">Manager for reading _dta.txt or .mzML files; must have already been initialized by the calling class</param>
        /// <param name="psmResultsManager"></param>
        /// <param name="ascoreParams"></param>
        /// <param name="ascoreOptions"></param>
        /// <param name="spectraFileOpened">Set to true if processing a single dataset, and spectraManager.OpenFile() has already been called</param>
        private void RunAScoreOnPreparedData(
            IReadOnlyDictionary <string, DatasetFileInfo> jobToDatasetNameMap,
            SpectraManagerCache spectraManager,
            PsmResultsManager psmResultsManager,
            ParameterFileManager ascoreParams,
            AScoreOptions ascoreOptions,
            bool spectraFileOpened)
        {
            var totalRows            = psmResultsManager.GetRowLength();
            var dctPeptidesProcessed = new Dictionary <string, int>();

            if (jobToDatasetNameMap == null || jobToDatasetNameMap.Count == 0)
            {
                const string errorMessage = "Error in AlgorithmRun: jobToDatasetNameMap cannot be null or empty";
                OnErrorEvent(errorMessage);
                throw new ArgumentException(errorMessage);
            }

            ISpectraManager spectraFile = null;
            string          spectraManagerCurrentJob = null; // Force open after first read from fht

            var modSummaryManager = new ModSummaryFileManager();

            RegisterEvents(modSummaryManager);

            var peptideMassCalculator = new PeptideMassCalculator();

            if (FilterOnMSGFScore)
            {
                OnStatusEvent("Filtering using MSGF_SpecProb <= " + ascoreParams.MSGFPreFilter.ToString("0.0E+00"));
            }
            Console.WriteLine();

            var statsByType     = new int[4];
            var ascoreAlgorithm = new AScoreAlgorithm();

            RegisterEvents(ascoreAlgorithm);

            while (psmResultsManager.CurrentRowNum < totalRows)
            {
                //  Console.Clear();

                if (psmResultsManager.CurrentRowNum % 100 == 0)
                {
                    Console.Write("\rPercent Completion " + Math.Round((double)psmResultsManager.CurrentRowNum / totalRows * 100) + "%");
                }

                int    scanNumber;
                int    scanCount;
                int    chargeState;
                string peptideSeq;
                double msgfScore;

                if (FilterOnMSGFScore)
                {
                    psmResultsManager.GetNextRow(out scanNumber, out scanCount, out chargeState, out peptideSeq, out msgfScore, ref ascoreParams);
                }
                else
                {
                    psmResultsManager.GetNextRow(out scanNumber, out scanCount, out chargeState, out peptideSeq, ref ascoreParams);
                    msgfScore = 1;
                }

                switch (ascoreParams.FragmentType)
                {
                case FragmentType.CID:
                    statsByType[(int)FragmentType.CID]++;
                    break;

                case FragmentType.ETD:
                    statsByType[(int)FragmentType.ETD]++;
                    break;

                case FragmentType.HCD:
                    statsByType[(int)FragmentType.HCD]++;
                    break;

                default:
                    statsByType[(int)FragmentType.Unspecified]++;
                    break;
                }

                if (string.IsNullOrEmpty(spectraManagerCurrentJob) || !string.Equals(spectraManagerCurrentJob, psmResultsManager.JobNum))
                {
                    // New dataset
                    // Get the correct spectrum file for the match
                    if (!jobToDatasetNameMap.TryGetValue(psmResultsManager.JobNum, out var datasetInfo))
                    {
                        var errorMessage = "Input file refers to job " + psmResultsManager.JobNum +
                                           " but jobToDatasetNameMap does not contain that job; unable to continue";
                        OnWarningEvent(errorMessage);

                        if (!psmResultsManager.JobColumnDefined)
                        {
                            OnWarningEvent(
                                "If the input file includes results from multiple jobs, the first column must be job number with Job as the column heading");
                        }

                        throw new Exception(errorMessage);
                    }

                    var datasetName = GetDatasetName(datasetInfo.SpectrumFilePath);
                    OnStatusEvent("Dataset name: " + datasetName);

                    if (!spectraFileOpened)
                    {
                        // This method was called from RunAScoreWithMappingFile
                        // Open the spectrum file for this dataset
                        spectraFile = spectraManager.GetSpectraManagerForFile(
                            psmResultsManager.PSMResultsFilePath,
                            datasetName,
                            datasetInfo.ModSummaryFilePath);
                    }
                    else
                    {
                        spectraFile = spectraManager.GetCurrentSpectrumManager();
                    }

                    spectraManagerCurrentJob = string.Copy(psmResultsManager.JobNum);
                    Console.Write("\r");

                    if (string.IsNullOrWhiteSpace(datasetInfo.ModSummaryFilePath) && !string.IsNullOrWhiteSpace(ascoreOptions.ModSummaryFile))
                    {
                        datasetInfo.ModSummaryFilePath = ascoreOptions.ModSummaryFile;
                    }

                    if (psmResultsManager is MsgfMzid mzid)
                    {
                        mzid.SetModifications(ascoreParams);
                    }
                    else if (psmResultsManager is MsgfMzidFull mzidFull)
                    {
                        mzidFull.SetModifications(ascoreParams);
                    }
                    else
                    {
                        if (string.IsNullOrEmpty(datasetInfo.ModSummaryFilePath))
                        {
                            modSummaryManager.ReadModSummary(spectraFile.DatasetName, psmResultsManager.PSMResultsFilePath, ascoreParams);
                        }
                        else
                        {
                            var modSummaryFile = new FileInfo(datasetInfo.ModSummaryFilePath);
                            modSummaryManager.ReadModSummary(modSummaryFile, ascoreParams);
                        }
                    }

                    Console.WriteLine();

                    Console.Write("\rPercent Completion " + Math.Round((double)psmResultsManager.CurrentRowNum / totalRows * 100) + "%");
                }

                // perform work on the match
                var    peptideParts = peptideSeq.Split('.');
                string sequenceWithoutSuffixOrPrefix;
                string front;
                string back;

                if (peptideParts.Length >= 3)
                {
                    front = peptideParts[0];
                    sequenceWithoutSuffixOrPrefix = peptideParts[1];
                    back = peptideParts[2];
                }
                else
                {
                    front = "?";
                    sequenceWithoutSuffixOrPrefix = string.Copy(peptideSeq);
                    back = "?";
                }

                var sequenceClean = GetCleanSequence(sequenceWithoutSuffixOrPrefix, ref ascoreParams);
                var skipPSM       = FilterOnMSGFScore && msgfScore > ascoreParams.MSGFPreFilter;

                var scanChargePeptide = scanNumber + "_" + chargeState + "_" + sequenceWithoutSuffixOrPrefix;
                if (dctPeptidesProcessed.ContainsKey(scanChargePeptide))
                {
                    // We have already processed this PSM
                    skipPSM = true;
                }
                else
                {
                    dctPeptidesProcessed.Add(scanChargePeptide, 0);
                }

                if (skipPSM)
                {
                    psmResultsManager.IncrementRow();
                    continue;
                }

                //Get experimental spectra
                if (spectraFile == null)
                {
                    const string errorMessage = "spectraFile is uninitialized in RunAScoreOnPreparedData; this indicates a programming bug";
                    OnErrorEvent(errorMessage);
                    throw new Exception(errorMessage);
                }

                var expSpec = spectraFile.GetExperimentalSpectra(scanNumber, scanCount, chargeState);

                if (expSpec == null)
                {
                    OnWarningEvent("Scan " + scanNumber + " not found in spectra file for peptide " + peptideSeq);
                    psmResultsManager.IncrementRow();
                    continue;
                }

                // Assume monoisotopic for both hi res and low res spectra
                MolecularWeights.MassType = MassType.Monoisotopic;

                // Compute precursor m/z value
                var precursorMZ = peptideMassCalculator.ConvoluteMass(expSpec.PrecursorMass, 1, chargeState);

                // Set the m/z range
                var mzMax = maxRange;
                var mzMin = precursorMZ * lowRangeMultiplier;

                if (ascoreParams.FragmentType != FragmentType.CID)
                {
                    mzMax = maxRange;
                    mzMin = minRange;
                }

                //Generate all combination mixtures
                var modMixture = new Combinatorics.ModMixtureCombo(ascoreParams.DynamicMods, sequenceClean);

                var myPositionsList = GetMyPositionList(sequenceClean, modMixture);

                //If I have more than 1 modifiable site proceed to calculation
                if (myPositionsList.Count > 1)
                {
                    ascoreAlgorithm.ComputeAScore(psmResultsManager, ascoreParams, scanNumber, chargeState,
                                                  peptideSeq, front, back, sequenceClean, expSpec,
                                                  mzMax, mzMin, myPositionsList);
                }
                else if (myPositionsList.Count == 1)
                {
                    // Either one or no modifiable sites
                    var uniqueID = myPositionsList[0].Max();
                    if (uniqueID == 0)
                    {
                        psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, myPositionsList[0], MOD_INFO_NO_MODIFIED_RESIDUES);
                    }
                    else
                    {
                        psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, myPositionsList[0], LookupModInfoByID(uniqueID, ascoreParams.DynamicMods));
                    }
                }
                else
                {
                    // No modifiable sites
                    psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, new int[0], MOD_INFO_NO_MODIFIED_RESIDUES);
                }
                psmResultsManager.IncrementRow();
            }

            Console.WriteLine();

            OnStatusEvent(string.Format("Writing {0:N0} rows to {1}", psmResultsManager.ResultsCount, PathUtils.CompactPathString(ascoreOptions.AScoreResultsFilePath, 80)));
            psmResultsManager.WriteToFile(ascoreOptions.AScoreResultsFilePath);

            Console.WriteLine();

            if (statsByType.Sum() == 0)
            {
                OnWarningEvent("Input file appeared empty");
            }
            else
            {
                OnStatusEvent("Stats by fragmentation ion type:");
                ReportStatsForFragType("  CID", statsByType, FragmentType.CID);
                ReportStatsForFragType("  ETD", statsByType, FragmentType.ETD);
                ReportStatsForFragType("  HCD", statsByType, FragmentType.HCD);
            }

            Console.WriteLine();
        }
Exemplo n.º 12
0
        public void ComputeAScore(
            PsmResultsManager psmResultsManager, ParameterFileManager ascoreParams, int scanNumber,
            int chargeState, string peptideSeq, string front, string back, string sequenceClean, ExperimentalSpectra expSpec,
            double mzMax, double mzMin, IReadOnlyList <int[]> myPositionsList)
        {
            // Change the charge state to 2+ if it is 1+
            if (chargeState == 1)
            {
                chargeState = 2;
            }

            // Parallel lists of scores
            var peptideScores  = new List <List <double> >();
            var weightedScores = new List <List <double> >();

            try
            {
                var theoreticalMonoMassSpectra    = new TheoreticalSpectra(sequenceClean, ascoreParams, chargeState, MassType.Monoisotopic);
                var theoreticalAverageMassSpectra = new TheoreticalSpectra(sequenceClean, ascoreParams, chargeState, MassType.Average);
                var peptideMassTheoretical        = theoreticalMonoMassSpectra.PeptideNeutralMassWithStaticMods + GetModMassTotal(peptideSeq, ascoreParams.DynamicMods);

                if (Math.Abs(peptideMassTheoretical - expSpec.PrecursorNeutralMass) > 20)
                {
                    OnWarningEvent(string.Format(
                                       "Scan {0}: Observed precursor mass of {1:F1} Da is more than 20 Da away from the computed mass of {2:F1} Da; DeltaMass = {3:F1} Da",
                                       scanNumber,
                                       expSpec.PrecursorNeutralMass,
                                       peptideMassTheoretical,
                                       expSpec.PrecursorNeutralMass - peptideMassTheoretical));
                }
                else
                {
                    // Make sure the masses agree within a reasonable tolerance
                    var validMatch = false;

                    for (double chargeAdjust = 0; chargeAdjust < 0.1; chargeAdjust += 0.005)
                    {
                        for (var massAdjust = -chargeState - 3; massAdjust <= chargeState + 3; massAdjust++)
                        {
                            var delM = peptideMassTheoretical - expSpec.PrecursorNeutralMass + massAdjust * MASS_C13;
                            if (Math.Abs(delM) < 0.15 + chargeState * chargeAdjust)
                            {
                                validMatch = true;
                                break;
                            }
                        }

                        if (validMatch)
                        {
                            break;
                        }
                    }

                    if (!validMatch)
                    {
                        OnWarningEvent(string.Format(
                                           "Scan {0}: Observed precursor mass of {1:F1} Da is not a reasonable match for computed mass of {2:F1} Da; " +
                                           "DeltaMass = {3:F1} Da; Peptide = {4}",
                                           scanNumber,
                                           expSpec.PrecursorNeutralMass,
                                           peptideMassTheoretical,
                                           expSpec.PrecursorNeutralMass - peptideMassTheoretical,
                                           peptideSeq
                                           ));
                    }
                }

                var modNumber = 0;
                foreach (var myPositions in myPositionsList)
                {
                    //Generate spectra for a modification combination
                    var myIons = GetChargeList(ascoreParams, mzMax, mzMin, theoreticalMonoMassSpectra, theoreticalAverageMassSpectra, myPositions);
                    peptideScores.Add(new List <double>());
                    weightedScores.Add(new List <double>());

                    for (var peakDepth = 1; peakDepth < 11; ++peakDepth)
                    {
                        var peakDepthSpectra = expSpec.GetPeakDepthSpectra(peakDepth);
                        peakDepthSpectra.Sort();

                        var matchedIons = GetMatchedMZ(ascoreParams.FragmentMassTolerance, myIons, peakDepthSpectra);

                        //Adjusted peptide score to score based on tolerance window.
                        var score = PeptideScoresManager.GetPeptideScore(
                            peakDepth * ascoreParams.FragmentMassTolerance * 2 / 100.0, myIons.Count, matchedIons.Count);

                        // Check if there were any negative scores
                        peptideScores[modNumber].Add(score);
                        weightedScores[modNumber].Add(score * ScoreWeights[peakDepth - 1]);
                    }
                    modNumber++;
                }

                var sortedSumScore = new List <ValueIndexPair <double> >();
                for (var seq = 0; seq < peptideScores.Count; ++seq)
                {
                    var score = 0.0;
                    for (var depth = 0; depth < peptideScores[seq].Count; ++depth)
                    {
                        score += weightedScores[seq][depth];
                    }
                    sortedSumScore.Add(new ValueIndexPair <double>(score, seq));
                }

                sortedSumScore.Sort();
                var topPeptideScore = sortedSumScore[0].Value;

                // Need the phosphorylation sites for the top peptide
                var topPeptidePTMSites = myPositionsList[sortedSumScore[0].Index];

                var ascoreResults = CalculateAScoreForSite(ascoreParams, expSpec, mzMax, mzMin, myPositionsList, topPeptidePTMSites, peptideScores, theoreticalMonoMassSpectra,
                                                           theoreticalAverageMassSpectra, sortedSumScore);

                foreach (var ascoreResult in ascoreResults)
                {
                    ascoreResult.SecondSequence = front + "." +
                                                  GenerateFinalSequences(sequenceClean, ascoreParams, ascoreResult.PeptideMods) + "." + back;
                }

                //Put scores into our table
                var bestSeq = front + "." + GenerateFinalSequences(sequenceClean, ascoreParams, topPeptidePTMSites) + "." + back;
                foreach (var ascoreResult in ascoreResults)
                {
                    psmResultsManager.WriteToTable(peptideSeq, bestSeq, scanNumber, topPeptideScore, ascoreResult);
                }
            }
            catch (Exception ex)
            {
                OnErrorEvent("Exception in ComputeAScore: " + ex.Message);
                throw;
            }
        }
Exemplo n.º 13
0
        /// <summary>
        /// Calculate the AScore for the given site information
        /// </summary>
        /// <param name="ascoreParams"></param>
        /// <param name="expSpec"></param>
        /// <param name="mzMax"></param>
        /// <param name="mzMin"></param>
        /// <param name="myPositionsList"></param>
        /// <param name="topPeptidePTMSites"></param>
        /// <param name="peptideScores"></param>
        /// <param name="theoreticalMonoMassSpectra"></param>
        /// <param name="theoreticalAverageMassSpectra"></param>
        /// <param name="sortedSumScore"></param>
        /// <returns></returns>
        private List <AScoreResult> CalculateAScoreForSite(ParameterFileManager ascoreParams, ExperimentalSpectra expSpec,
                                                           double mzMax, double mzMin,
                                                           IReadOnlyList <int[]> myPositionsList,
                                                           int[] topPeptidePTMSites,
                                                           IReadOnlyList <List <double> > peptideScores,
                                                           TheoreticalSpectra theoreticalMonoMassSpectra,
                                                           TheoreticalSpectra theoreticalAverageMassSpectra,
                                                           IReadOnlyList <ValueIndexPair <double> > sortedSumScore)
        {
            // Initialize AScore results storage
            var lstResults = new List <AScoreResult>();

            var siteInfo = GetSiteDict(topPeptidePTMSites);

            // Get the top sequence theoretical spectra
            var topTheoreticalIons = GetChargeList(ascoreParams, mzMax, mzMin, theoreticalMonoMassSpectra, theoreticalAverageMassSpectra, topPeptidePTMSites);

            for (var indSite = 0; indSite < siteInfo.Count; ++indSite)
            {
                var ascoreResult = new AScoreResult();
                lstResults.Add(ascoreResult);

                ascoreResult.ModInfo = LookupModInfoByID(siteInfo.Values[indSite], ascoreParams.DynamicMods);

                int secondPeptide;
                for (secondPeptide = 0; secondPeptide < sortedSumScore.Count; ++secondPeptide)
                {
                    var secondDict = GetSiteDict(myPositionsList[sortedSumScore[secondPeptide].Index]);

                    var othersMatch = true;
                    if (!secondDict.ContainsKey(siteInfo.Keys[indSite]))
                    {
                        var sites = siteInfo.Keys.ToList();
                        for (var i = 0; i < sites.Count; i++)
                        {
                            if (i != indSite)
                            {
                                othersMatch = othersMatch && secondDict.ContainsKey(sites[i]);
                            }
                        }

                        if (othersMatch)
                        {
                            ascoreResult.PeptideMods = myPositionsList[sortedSumScore[secondPeptide].Index];
                            break;
                        }
                    }
                    else
                    {
                        if (secondDict[siteInfo.Keys[indSite]] != siteInfo.Values[indSite])
                        {
                            ascoreResult.PeptideMods = myPositionsList[sortedSumScore[secondPeptide].Index];
                            break;
                        }
                    }
                }

                if (secondPeptide == sortedSumScore.Count)
                {
                    ascoreResult.AScore               = 1000;
                    ascoreResult.NumSiteIons          = 0;
                    ascoreResult.SiteDetermineMatched = 0;

                    continue;
                }

                var secondTopPeptidePTMSites = myPositionsList[sortedSumScore[secondPeptide].Index];
                // Get the second best scoring spectra

                var secondTopTheoreticalIons = GetChargeList(ascoreParams,
                                                             mzMax, mzMin,
                                                             theoreticalMonoMassSpectra,
                                                             theoreticalAverageMassSpectra,
                                                             secondTopPeptidePTMSites);

                // Calculate the diff score between the top and second sites
                var diffScore = new List <ValueIndexPair <double> >();
                for (var i = 0; i < peptideScores[0].Count; ++i)
                {
                    diffScore.Add(new ValueIndexPair <double>(
                                      peptideScores[sortedSumScore[0].Index][i] -
                                      peptideScores[sortedSumScore[secondPeptide].Index][i], i));
                }

                // Sort in descending order
                diffScore.Sort();

                // Find the peak depth for the diff score
                var peakDepthForAScore = 1;
                if (diffScore[0].Value > 0)
                {
                    peakDepthForAScore = diffScore[0].Index + 1;
                }

                var siteIons1 = GetSiteDeterminingIons(topTheoreticalIons, secondTopTheoreticalIons);
                var siteIons2 = GetSiteDeterminingIons(secondTopTheoreticalIons, topTheoreticalIons);

                var peakDepthSpectraFinal = expSpec.GetPeakDepthSpectra(peakDepthForAScore);
                peakDepthSpectraFinal.Sort();

                var bestDeterminingCount = GetMatchedMZ(ascoreParams.FragmentMassTolerance, siteIons1, peakDepthSpectraFinal).Count;

                var secondBestDeterminingCount = GetMatchedMZ(ascoreParams.FragmentMassTolerance, siteIons2, peakDepthSpectraFinal).Count;

                var a1 = PeptideScoresManager.GetPeptideScore(peakDepthForAScore * ascoreParams.FragmentMassTolerance * 2 / 100,
                                                              siteIons1.Count, bestDeterminingCount);

                var a2 = PeptideScoresManager.GetPeptideScore(peakDepthForAScore * ascoreParams.FragmentMassTolerance * 2 / 100,
                                                              siteIons2.Count, secondBestDeterminingCount);

                // Add the results to the list
                ascoreResult.AScore               = Math.Abs(a1 - a2);
                ascoreResult.NumSiteIons          = siteIons1.Count;            // numSiteIonsPoss
                ascoreResult.SiteDetermineMatched = bestDeterminingCount;       // numSiteIonsMatched
            }

            return(lstResults);
        }
Exemplo n.º 14
0
        /// <summary>
        /// Runs the all the tools necessary to perform an ascore run
        /// </summary>
        /// <param name="dtaFileName">dta file path</param>
        /// <param name="parameterFile">parameter file path</param>
        /// <param name="datasetFileName">dataset file path</param>
        /// <param name="outputFilePath">output file path</param>
        public static void AlgorithmRun(string dtaFileName, string parameterFile, string datasetFileName, string outputFilePath)
        {
            //		IonWriter myIonWriter = new IonWriter(System.IO.Path.Combine(System.IO.Path.GetDirectoryName(datasetFileName), "IonLog.txt"));

            System.Data.DataTable dt               = Utilities.TextFileToDataTableAssignTypeString(datasetFileName, false);
            DtaManager            dtaManager       = new DtaManager(dtaFileName);
            ParameterFileManager  ascoreParameters = new ParameterFileManager(parameterFile);

            //string datasetName = "Syne_Glyco-2_degly-S_10Jul11_Andromeda_11-06-19";
            //int scanNumber = 7315;
            //int scanCount = 1;
            //int chargeState = 3;
            //string sequence = "IVNDELESLGYGENLLNLSTINR";

            string datasetName = System.IO.Path.GetFileName(dtaFileName).Substring(0, System.IO.Path.GetFileName(dtaFileName).Length - 8);

            //adds columns to the datatable corresponding to ascore info
            dt.Columns.Add("BestSequence", typeof(string));
            dt.Columns.Add("PeptideScore", typeof(double));
            dt.Columns.Add("AScore", typeof(double));
            dt.Columns.Add("numSiteIons", typeof(int));
            dt.Columns.Add("SecondSequence", typeof(string));
            int totalRows  = dt.Rows.Count;
            int rowsCounts = 0;


            //Where all the action happens
            for (int t = 0; t < totalRows; t++)
            {
                Console.WriteLine(rowsCounts++ + " / " + totalRows);


                //Use array of column names specific to other id forms
                int    scanNumber  = int.Parse((string)dt.Rows[t]["ScanNum"]);
                int    scanCount   = int.Parse((string)dt.Rows[t]["ScanCount"]);
                int    chargeState = int.Parse((string)dt.Rows[t]["ChargeState"]);
                string peptideSeq  = (string)dt.Rows[t]["Peptide"];

                string[] splittedPep = peptideSeq.Split('.');
                string   front       = splittedPep[0];
                string   back        = splittedPep[2];

                //TODO:what this does and change name of ascoreParameters
                string sequence = GetCleanSequence(peptideSeq, ref ascoreParameters);

                //Generate combinations for this sequence/mod seto
                //Dictionary of list Now part of an object
                //List<List<int>> mySites = GetSiteLocation(ascoreParameters.DynamicMods, sequence);
                //List<List<List<int>>> myCombos = GenerateCombosToCheck(mySites, ascoreParameters.DynamicMods);

                //Get experimental spectra
                ExperimentalSpectra expSpec = dtaManager.GetExperimentalSpectra(GetDtaFileName(datasetName, scanNumber,
                                                                                               scanCount, chargeState));

                //I assume monoisotopic here, nobody uses average anymore.
                MolecularWeights.MassType = MassType.Monoisotopic;

                //Get precursor
                double precursorMZ = (expSpec.PrecursorMass +
                                      ((expSpec.PrecursorChargeState - 1) *
                                       MolecularWeights.Hydrogen)) / chargeState;
                //Set the m/z range
                //Remove magic numbers parameterize
                double mzmax = 2000.0;
                double mzmin = precursorMZ * 0.28;
                if (ascoreParameters.FragmentType == FragmentType.CID)
                {
                    mzmax = 2000.0;
                    mzmin = precursorMZ * 0.28;
                }
                else
                {
                    mzmax = 2000.0;
                    mzmin = 50.0;
                }

                //initialize ascore variable storage
                List <double>      vecAScore      = new List <double>();
                List <int>         vecNumSiteIons = new List <int>();
                List <List <int> > AScorePeptide  = new List <List <int> >();

                //Generate all combination mixtures
                Combinatorics.ModMixtureCombo modMixture = new Combinatorics.ModMixtureCombo(ascoreParameters.DynamicMods, sequence);

                List <int[]> myPositionsList = new List <int[]>();
                foreach (List <int> mycom in modMixture.FinalCombos)
                {
                    int[] myPositions = new int[sequence.Length];
                    for (int i = 0; i < mycom.Count; i++)
                    {
                        myPositions[modMixture.AllSite[i]] = mycom[i];
                    }
                    myPositionsList.Add(myPositions);
                }

                //If I have more than 1 modifiable site proceed to calculation
                if (myPositionsList.Count > 1)
                {
                    List <List <double> > peptideScores  = new List <List <double> >();
                    List <List <double> > weightedScores = new List <List <double> >();

                    TheoreticalSpectra theo = new TheoreticalSpectra(sequence, ascoreParameters, chargeState,
                                                                     new List <Mod.DynamicModification>(), MassType.Monoisotopic);

                    int modNumber = 0;
                    foreach (int[] myPositions in myPositionsList)
                    {
                        //Generate spectra for a modification combination
                        Dictionary <int, ChargeStateIons> mySpectra = theo.GetTempSpectra(myPositions,
                                                                                          ascoreParameters.DynamicMods, MassType.Monoisotopic);
                        //Get ions within m/z range
                        List <double> myIons = GetCurrentComboTheoreticalIons(mzmax, mzmin, mySpectra);
                        peptideScores.Add(new List <double>());
                        weightedScores.Add(new List <double>());

                        for (int peakDepth = 1; peakDepth < 11; ++peakDepth)
                        {
                            List <ExperimentalSpectraEntry> peakDepthSpectra = expSpec.GetPeakDepthSpectra(peakDepth);
                            List <double> matchedIons = GetMatchedMZ(
                                peakDepth, ascoreParameters.FragmentMassTolerance,
                                myIons, peakDepthSpectra);

                            //if (scanNumber == 19446)
                            //{

                            //    myIonWriter.MatchList(matchedIons);
                            //}

                            //Adjusted peptide score to score based on tolerance window.
                            double score = PeptideScoresManager.GetPeptideScore(
                                ((double)peakDepth * ascoreParameters.FragmentMassTolerance * 2) / 100.0, myIons.Count, matchedIons.Count);

                            // Check if there were any negative scores
                            peptideScores[modNumber].Add(score);
                            weightedScores[modNumber].Add(
                                score * ScoreWeights[peakDepth - 1]);
                        }
                        modNumber++;
                    }
                    List <ValueIndexPair <double> > sortedSumScore = new List <ValueIndexPair <double> >();
                    for (int seq = 0; seq < peptideScores.Count; ++seq)
                    {
                        double score = 0.0;
                        for (int depth = 0; depth < peptideScores[seq].Count; ++depth)
                        {
                            score += weightedScores[seq][depth];
                        }
                        sortedSumScore.Add(new ValueIndexPair <double>(score, seq));
                    }

                    sortedSumScore.Sort(new ValueIndexPair <double> .SortValueDescend());
                    double topPeptideScore = sortedSumScore[0].Value;

                    // Need the phosphorylation sites for the top peptide
                    int[] topPeptidePTMsites =
                        myPositionsList[sortedSumScore[0].Index];

                    // Get the top sequence theoretical spectra
                    Dictionary <int, ChargeStateIons> topSpectra = theo.GetTempSpectra(topPeptidePTMsites,
                                                                                       ascoreParameters.DynamicMods, MassType.Monoisotopic);

                    List <double> topTheoIons = GetCurrentComboTheoreticalIons(mzmax, mzmin, topSpectra);

                    int secondPeptide = 1;


                    int[] secondTopPeptidePTMsites = myPositionsList[sortedSumScore[secondPeptide].Index];
                    // Get the second best scoring spectra
                    Dictionary <int, ChargeStateIons> secondTopSpectra = theo.GetTempSpectra(
                        secondTopPeptidePTMsites, ascoreParameters.DynamicMods, MassType.Monoisotopic);

                    List <double> secondTopTheoIons = GetCurrentComboTheoreticalIons(mzmax, mzmin, secondTopSpectra);


                    // Calculate the diff score between the top and second sites
                    List <ValueIndexPair <double> > diffScore = new List <ValueIndexPair <double> >();
                    for (int i = 0; i < peptideScores[0].Count; ++i)
                    {
                        diffScore.Add(new ValueIndexPair <double>(
                                          peptideScores[sortedSumScore[0].Index][i] -
                                          peptideScores[sortedSumScore[secondPeptide].Index][i], i));
                    }

                    // Sort in descending order
                    diffScore.Sort(new ValueIndexPair <double> .SortValueDescend());

                    // Find the peak depth for the diff score
                    int peakDepthForAScore = 1;
                    if (diffScore[0].Value > 0)
                    {
                        peakDepthForAScore = diffScore[0].Index + 1;
                    }

                    List <double> siteIons1 = GetSiteDeterminingIons(topTheoIons, secondTopTheoIons);
                    List <double> siteIons2 = GetSiteDeterminingIons(secondTopTheoIons, topTheoIons);

                    List <ExperimentalSpectraEntry> peakDepthSpectraFinal = expSpec.GetPeakDepthSpectra(peakDepthForAScore);

                    int bestDterminingCount = GetMatchedMZ(peakDepthForAScore,
                                                           ascoreParameters.FragmentMassTolerance, siteIons1, peakDepthSpectraFinal).Count;

                    double a1 = PeptideScoresManager.GetPeptideScore(
                        ((double)peakDepthForAScore * ascoreParameters.FragmentMassTolerance * 2) / 100,
                        siteIons1.Count, bestDterminingCount);
                    double a2 = PeptideScoresManager.GetPeptideScore(
                        ((double)peakDepthForAScore * ascoreParameters.FragmentMassTolerance * 2) / 100,
                        siteIons2.Count, GetMatchedMZ(peakDepthForAScore,
                                                      ascoreParameters.FragmentMassTolerance, siteIons2, peakDepthSpectraFinal).Count);

                    // Add the results to the list
                    vecAScore.Add(Math.Abs(a1 - a2));
                    vecNumSiteIons.Add(siteIons1.Count);


                    //Put scores into our table
                    dt.Rows[t]["BestSequence"] = front + "." + GenerateFinalSequences(sequence,
                                                                                      ascoreParameters, topPeptidePTMsites) + "." + back;
                    dt.Rows[t]["PeptideScore"]   = "" + topPeptideScore;
                    dt.Rows[t]["AScore"]         = "" + vecAScore[0];
                    dt.Rows[t]["numSiteIons"]    = "" + bestDterminingCount;
                    dt.Rows[t]["SecondSequence"] = front + "." + GenerateFinalSequences(sequence,
                                                                                        ascoreParameters, secondTopPeptidePTMsites) + "." + back;
                }
                else
                {
                    List <double>      weightedScores = new List <double>();
                    TheoreticalSpectra theo           = new TheoreticalSpectra(sequence, ascoreParameters, chargeState,
                                                                               new List <Mod.DynamicModification>(), MassType.Monoisotopic);
                    foreach (int[] myPositions in myPositionsList)
                    {
                        Dictionary <int, ChargeStateIons> mySpectra = theo.GetTempSpectra(myPositions,
                                                                                          ascoreParameters.DynamicMods, MassType.Monoisotopic);

                        List <double> myIons = GetCurrentComboTheoreticalIons(mzmax, mzmin, mySpectra);

                        for (int peakDepth = 1; peakDepth < 11; ++peakDepth)
                        {
                            List <ExperimentalSpectraEntry> peakDepthSpectra = expSpec.GetPeakDepthSpectra(peakDepth);

                            List <double> matchedIons = GetMatchedMZ(
                                peakDepth, ascoreParameters.FragmentMassTolerance,
                                myIons, peakDepthSpectra);

                            //Adjusted peptide score to score based on tolerance window.
                            double score = PeptideScoresManager.GetPeptideScore(
                                ((double)peakDepth * ascoreParameters.FragmentMassTolerance * 2) / 100.0, myIons.Count, matchedIons.Count);

                            // Check if there were any negative scores
                            weightedScores.Add(
                                score * ScoreWeights[peakDepth - 1]);
                        }
                    }
                    double pScore = 0.0;
                    for (int depth = 0; depth < weightedScores.Count; ++depth)
                    {
                        pScore += weightedScores[depth];
                    }


                    //Nothing to calculate
                    dt.Rows[t]["BestSequence"] = peptideSeq;
                    dt.Rows[t]["PeptideScore"] = "" + pScore;
                    if (myPositionsList[0].Count(i => i > 0) > 0)
                    {
                        dt.Rows[t]["AScore"] = "1000";
                    }
                    else
                    {
                        dt.Rows[t]["AScore"] = "-1";
                    }
                    dt.Rows[t]["numSiteIons"]    = "0";
                    dt.Rows[t]["SecondSequence"] = "---";
                }
            }

            Utilities.WriteDataTableToText(dt, outputFilePath);
        }
Exemplo n.º 15
0
        /// <summary>
        /// Runs the all the tools necessary to perform an ascore run
        /// </summary>
        /// <param name="dtaFileName">dta file path</param>
        /// <param name="parameterFile">parameter file path</param>
        /// <param name="datasetFileName">dataset file path</param>
        /// <param name="outputFilePath">output file path</param>
        public static void AlgorithmRunChargeAve(string dtaFileName, string parameterFile, string datasetFileName, string outputFilePath)
        {
            //		IonWriter myIonWriter = new IonWriter(System.IO.Path.Combine(System.IO.Path.GetDirectoryName(datasetFileName), "IonLog.txt"));

            System.Data.DataTable dt               = Utilities.TextFileToDataTableAssignTypeString(datasetFileName, false);
            DtaManager            dtaManager       = new DtaManager(dtaFileName);
            ParameterFileManager  ascoreParameters = new ParameterFileManager(parameterFile);

            //string datasetName = "Syne_Glyco-2_degly-S_10Jul11_Andromeda_11-06-19";
            //int scanNumber = 7315;
            //int scanCount = 1;
            //int chargeState = 3;
            //string sequence = "IVNDELESLGYGENLLNLSTINR";


            //Switch back to zero
            string datasetName = System.IO.Path.GetFileName(dtaFileName).Substring(0, System.IO.Path.GetFileName(dtaFileName).Length - 8);

            //adds columns to the datatable corresponding to ascore info
            dt.Columns.Add("BestSequence", typeof(string));
            dt.Columns.Add("PeptideScore", typeof(double));



            for (int i = 1; i < 4; i++)
            {
                dt.Columns.Add("AScore" + i, typeof(double));
                dt.Columns.Add("numSiteIonsMatched" + i, typeof(int));
                dt.Columns.Add("numSiteIonsPoss" + i, typeof(int));
                dt.Columns.Add("SecondSequence" + i, typeof(string));


                dt.Columns["AScore" + i].DefaultValue             = -1;
                dt.Columns["numSiteIonsMatched" + i].DefaultValue = 0;
                dt.Columns["numSiteIonsPoss" + i].DefaultValue    = 0;
                dt.Columns["SecondSequence" + i].DefaultValue     = "---";
            }

            for (int k = 0; k < dt.Rows.Count; k++)
            {
                for (int p = 1; p < 4; p++)
                {
                    dt.Rows[k]["AScore" + p]             = -1;
                    dt.Rows[k]["numSiteIonsMatched" + p] = 0;
                    dt.Rows[k]["numSiteIonsPoss" + p]    = 0;
                    dt.Rows[k]["SecondSequence" + p]     = "---";
                }
            }

            int totalRows  = dt.Rows.Count;
            int rowsCounts = 0;


            //Where all the action happens
            for (int t = 0; t < totalRows; t++)
            {
                Console.WriteLine(rowsCounts++ + " / " + totalRows);


                //Use array of column names specific to other id forms
                int    scanNumber  = int.Parse((string)dt.Rows[t]["ScanNum"]);
                int    scanCount   = int.Parse((string)dt.Rows[t]["ScanCount"]);
                int    chargeState = int.Parse((string)dt.Rows[t]["ChargeState"]);
                string peptideSeq  = (string)dt.Rows[t]["Peptide"];

                string[] splittedPep = peptideSeq.Split('.');
                string   front       = splittedPep[0];
                string   back        = splittedPep[2];

                //TODO:what this does and change name of ascoreParameters
                string sequence = GetCleanSequence(peptideSeq, ref ascoreParameters);

                //Generate combinations for this sequence/mod seto
                //Dictionary of list
                //List<List<int>> mySites = GetSiteLocation(ascoreParameters.DynamicMods, sequence);
                //List<List<List<int>>> myCombos = GenerateCombosToCheck(mySites, ascoreParameters.DynamicMods);

                //Get experimental spectra
                ExperimentalSpectra expSpec = dtaManager.GetExperimentalSpectra(GetDtaFileName(datasetName, scanNumber,
                                                                                               scanCount, chargeState));

                if (expSpec == null)
                {
                    continue;
                }

                //I assume monoisotopic here, nobody uses average anymore.
                MolecularWeights.MassType = MassType.Monoisotopic;

                //Get precursor
                double precursorMZ = (expSpec.PrecursorMass +
                                      ((expSpec.PrecursorChargeState - 1) *
                                       MolecularWeights.Hydrogen)) / chargeState;
                //Set the m/z range
                //Remove magic numbers parameterize
                double mzmax = 2000.0;
                double mzmin = precursorMZ * 0.28;
                if (ascoreParameters.FragmentType == FragmentType.CID)
                {
                    mzmax = 2000.0;
                    mzmin = precursorMZ * 0.28;
                }
                else
                {
                    mzmax = 2000.0;
                    mzmin = 50.0;
                }

                //initialize ascore variable storage
                List <double> vecAScore            = new List <double>();
                List <int>    vecNumSiteIons       = new List <int>();
                List <int[]>  AScorePeptide        = new List <int[]>();
                List <int>    siteDetermineMatched = new List <int>();

                //Generate all combination mixtures
                Combinatorics.ModMixtureCombo modMixture = new Combinatorics.ModMixtureCombo(ascoreParameters.DynamicMods, sequence);

                List <int[]> myPositionsList = new List <int[]>();
                foreach (List <int> mycom in modMixture.FinalCombos)
                {
                    int[] myPositions = new int[sequence.Length];
                    for (int i = 0; i < mycom.Count; i++)
                    {
                        myPositions[modMixture.AllSite[i]] = mycom[i];
                    }
                    myPositionsList.Add(myPositions);
                }

                //If I have more than 1 modifiable site proceed to calculation
                if (myPositionsList.Count > 1 && chargeState > 1)
                {
                    List <List <double> > peptideScores  = new List <List <double> >();
                    List <List <double> > weightedScores = new List <List <double> >();


                    TheoreticalSpectra theoMono = new TheoreticalSpectra(sequence, ascoreParameters, chargeState,
                                                                         new List <Mod.DynamicModification>(), MassType.Monoisotopic);
                    TheoreticalSpectra theoAve = new TheoreticalSpectra(sequence, ascoreParameters, chargeState,
                                                                        new List <Mod.DynamicModification>(), MassType.Average);

                    int modNumber = 0;
                    foreach (int[] myPositions in myPositionsList)
                    {
                        //Generate spectra for a modification combination
                        List <double> myIons = GetChargeList(ascoreParameters, mzmax, mzmin, theoMono, theoAve, myPositions);
                        peptideScores.Add(new List <double>());
                        weightedScores.Add(new List <double>());

                        for (int peakDepth = 1; peakDepth < 11; ++peakDepth)
                        {
                            List <ExperimentalSpectraEntry> peakDepthSpectra = expSpec.GetPeakDepthSpectra(peakDepth);
                            List <double> matchedIons = GetMatchedMZ(
                                peakDepth, ascoreParameters.FragmentMassTolerance,
                                myIons, peakDepthSpectra);

                            //if (scanNumber == 19446)
                            //{

                            //    myIonWriter.MatchList(matchedIons);
                            //}

                            //Adjusted peptide score to score based on tolerance window.
                            double score = PeptideScoresManager.GetPeptideScore(
                                ((double)peakDepth * ascoreParameters.FragmentMassTolerance * 2) / 100.0, myIons.Count, matchedIons.Count);

                            // Check if there were any negative scores
                            peptideScores[modNumber].Add(score);
                            weightedScores[modNumber].Add(
                                score * ScoreWeights[peakDepth - 1]);
                        }
                        modNumber++;
                    }
                    List <ValueIndexPair <double> > sortedSumScore = new List <ValueIndexPair <double> >();
                    for (int seq = 0; seq < peptideScores.Count; ++seq)
                    {
                        double score = 0.0;
                        for (int depth = 0; depth < peptideScores[seq].Count; ++depth)
                        {
                            score += weightedScores[seq][depth];
                        }
                        sortedSumScore.Add(new ValueIndexPair <double>(score, seq));
                    }

                    sortedSumScore.Sort(new ValueIndexPair <double> .SortValueDescend());
                    double topPeptideScore = sortedSumScore[0].Value;



                    // Need the phosphorylation sites for the top peptide
                    int[] topPeptidePTMsites =
                        myPositionsList[sortedSumScore[0].Index];



                    SortedList <int, int> siteInfo = GetSiteDict(topPeptidePTMsites);


                    Dictionary <int, ChargeStateIons> bestDatatoWrite = new Dictionary <int, ChargeStateIons>();
                    // Get the top sequence theoretical spectra
                    List <double> topTheoIons = GetChargeList(ascoreParameters, mzmax, mzmin, theoMono, theoAve, topPeptidePTMsites, out bestDatatoWrite);

                    List <char> modChars = new List <char>();
                    foreach (Mod.DynamicModification m in ascoreParameters.DynamicMods)
                    {
                        modChars.Add(m.ModSymbol);
                    }

                    DataToExcelPractice.InfoForIons bestIonReport = new DataToExcelPractice.InfoForIons();

                    bestIonReport.Sequence = GenerateFinalSequences(sequence, ascoreParameters, topPeptidePTMsites);
                    bestIonReport.InitializeSequenceList(modChars.ToArray());
                    bestIonReport.ChargeState = chargeState;
                    bestIonReport.Depth       = -1;
                    foreach (int k in bestDatatoWrite.Keys)
                    {
                        bestIonReport.BionData.Add(k, new List <double>(bestDatatoWrite[k].BIons));
                        bestIonReport.YionData.Add(k, new List <double>(bestDatatoWrite[k].YIons));
                    }
                    List <double> topIonIntensity = new List <double>();
                    bestIonReport.MatchesForThisDepth = GetMatchedMZStoreIntensity(10, ascoreParameters.FragmentMassTolerance, topTheoIons,
                                                                                   expSpec.GetPeakDepthSpectra(10), out topIonIntensity);
                    bestIonReport.MatchedIonIntensity = new List <double>(topIonIntensity);
                    bestIonReport.AScore     = -1.0;
                    bestIonReport.ScanNumber = scanNumber;



                    List <DataToExcelPractice.InfoForIons> otherSites = new List <DataToExcelPractice.InfoForIons>();



                    for (int indSite = 0; indSite < siteInfo.Count; ++indSite)
                    {
                        int secondPeptide = 0;
                        for (secondPeptide = 0; secondPeptide < sortedSumScore.Count; ++secondPeptide)
                        {
                            SortedList <int, int> secondDict = GetSiteDict(myPositionsList[
                                                                               sortedSumScore[secondPeptide].Index]);
                            bool othersMatch = true;
                            if (!secondDict.ContainsKey(siteInfo.Keys[indSite]))
                            {
                                List <int> sites = siteInfo.Keys.ToList();
                                for (int i = 0; i < sites.Count; i++)
                                {
                                    if (i != indSite)
                                    {
                                        othersMatch = othersMatch && secondDict.ContainsKey(sites[i]);
                                    }
                                }
                                if (othersMatch)
                                {
                                    AScorePeptide.Add(myPositionsList[sortedSumScore[secondPeptide].Index]);
                                    break;
                                }
                            }
                            else
                            {
                                if (secondDict[siteInfo.Keys[indSite]] != siteInfo.Values[indSite])
                                {
                                    AScorePeptide.Add(myPositionsList[sortedSumScore[secondPeptide].Index]);
                                    break;
                                }
                            }
                        }
                        if (secondPeptide == sortedSumScore.Count)
                        {
                            continue;
                        }


                        int[] secondTopPeptidePTMsites = myPositionsList[sortedSumScore[secondPeptide].Index];
                        // Get the second best scoring spectra


                        Dictionary <int, ChargeStateIons> secondIonWriter = new Dictionary <int, ChargeStateIons>();
                        List <double> secondTopTheoIons = GetChargeList(ascoreParameters, mzmax, mzmin, theoMono, theoAve, secondTopPeptidePTMsites,
                                                                        out secondIonWriter);


                        // Calculate the diff score between the top and second sites
                        List <ValueIndexPair <double> > diffScore = new List <ValueIndexPair <double> >();
                        for (int i = 0; i < peptideScores[0].Count; ++i)
                        {
                            diffScore.Add(new ValueIndexPair <double>(
                                              peptideScores[sortedSumScore[0].Index][i] -
                                              peptideScores[sortedSumScore[secondPeptide].Index][i], i));
                        }

                        // Sort in descending order
                        diffScore.Sort(new ValueIndexPair <double> .SortValueDescend());

                        // Find the peak depth for the diff score
                        int peakDepthForAScore = 1;
                        if (diffScore[0].Value > 0)
                        {
                            peakDepthForAScore = diffScore[0].Index + 1;
                        }


                        DataToExcelPractice.InfoForIons secondBestIonReport = new DataToExcelPractice.InfoForIons();
                        secondBestIonReport.Sequence    = GenerateFinalSequences(sequence, ascoreParameters, secondTopPeptidePTMsites);
                        secondBestIonReport.ChargeState = chargeState;
                        foreach (int k in secondIonWriter.Keys)
                        {
                            secondBestIonReport.BionData.Add(k, new List <double>(secondIonWriter[k].BIons));
                            secondBestIonReport.YionData.Add(k, new List <double>(secondIonWriter[k].YIons));
                        }


                        List <double> sTopIonIntensity = new List <double>();
                        secondBestIonReport.MatchesForThisDepth = GetMatchedMZStoreIntensity(10, ascoreParameters.FragmentMassTolerance, secondTopTheoIons,
                                                                                             expSpec.GetPeakDepthSpectra(10), out sTopIonIntensity);
                        secondBestIonReport.MatchedIonIntensity = new List <double>(sTopIonIntensity);
                        secondBestIonReport.Depth = peakDepthForAScore;
                        secondBestIonReport.InitializeSequenceList(modChars.ToArray());
                        secondBestIonReport.ScanNumber = scanNumber;

                        List <double> siteIons1 = GetSiteDeterminingIons(topTheoIons, secondTopTheoIons);
                        List <double> siteIons2 = GetSiteDeterminingIons(secondTopTheoIons, topTheoIons);



                        List <ExperimentalSpectraEntry> peakDepthSpectraFinal = expSpec.GetPeakDepthSpectra(peakDepthForAScore);

                        List <double> matched1 = GetMatchedMZ(peakDepthForAScore,
                                                              ascoreParameters.FragmentMassTolerance, siteIons1, peakDepthSpectraFinal);
                        List <double> matched2 = GetMatchedMZ(peakDepthForAScore,
                                                              ascoreParameters.FragmentMassTolerance, siteIons2, peakDepthSpectraFinal);

                        int bestDterminingCount = matched1.Count;

                        double a1 = PeptideScoresManager.GetPeptideScore(((double)peakDepthForAScore * ascoreParameters.FragmentMassTolerance * 2) / 100,
                                                                         siteIons1.Count, matched1.Count);
                        double a2 = PeptideScoresManager.GetPeptideScore(((double)peakDepthForAScore * ascoreParameters.FragmentMassTolerance * 2) / 100,
                                                                         siteIons2.Count, matched2.Count);

                        bestIonReport.AddToSiteDeterminingIons(matched1);
                        secondBestIonReport.SiteDeterminingIons = new List <double>(matched2);
                        secondBestIonReport.AScore = Math.Abs(a1 - a2);

                        otherSites.Add(secondBestIonReport);

                        // Add the results to the list
                        vecAScore.Add(Math.Abs(a1 - a2));
                        vecNumSiteIons.Add(siteIons1.Count);
                        siteDetermineMatched.Add(bestDterminingCount);
                    }

                    DataToExcelPractice.PeptideIonGroup myIonGroup = new DataToExcelPractice.PeptideIonGroup(bestIonReport, otherSites);

                    DataToExcelPractice.WriteIonsToExcel myExcel = new DataToExcelPractice.WriteIonsToExcel();
                    myExcel.PrintIonsToExcel(myIonGroup, System.IO.Path.GetDirectoryName(outputFilePath));

                    //Put scores into our table
                    dt.Rows[t]["BestSequence"] = front + "." + GenerateFinalSequences(sequence, ascoreParameters, topPeptidePTMsites) + "." + back;
                    dt.Rows[t]["PeptideScore"] = "" + topPeptideScore;
                    for (int i = 0; i < vecAScore.Count && i < 3; i++)
                    {
                        dt.Rows[t]["AScore" + (i + 1)]             = "" + vecAScore[i];
                        dt.Rows[t]["numSiteIonsPoss" + (i + 1)]    = vecNumSiteIons[i];
                        dt.Rows[t]["numSiteIonsMatched" + (i + 1)] = "" + siteDetermineMatched[i];
                        dt.Rows[t]["SecondSequence" + (i + 1)]     = front + "." + GenerateFinalSequences(sequence, ascoreParameters, AScorePeptide[i]) + "." + back;
                    }
                }
                else if (chargeState > 1)
                {
                    List <double>      weightedScores = new List <double>();
                    TheoreticalSpectra theo           = new TheoreticalSpectra(sequence, ascoreParameters, chargeState, new List <Mod.DynamicModification>(), MassType.Monoisotopic);
                    foreach (int[] myPositions in myPositionsList)
                    {
                        Dictionary <int, ChargeStateIons> mySpectra = theo.GetTempSpectra(myPositions, ascoreParameters.DynamicMods, MassType.Monoisotopic);

                        List <double> myIons = GetCurrentComboTheoreticalIons(mzmax, mzmin, mySpectra);

                        for (int peakDepth = 1; peakDepth < 11; ++peakDepth)
                        {
                            List <ExperimentalSpectraEntry> peakDepthSpectra = expSpec.GetPeakDepthSpectra(peakDepth);

                            List <double> matchedIons = GetMatchedMZ(
                                peakDepth, ascoreParameters.FragmentMassTolerance,
                                myIons, peakDepthSpectra);

                            //Adjusted peptide score to score based on tolerance window.
                            double score = PeptideScoresManager.GetPeptideScore(
                                ((double)peakDepth * ascoreParameters.FragmentMassTolerance * 2) / 100.0, myIons.Count, matchedIons.Count);

                            // Check if there were any negative scores
                            weightedScores.Add(
                                score * ScoreWeights[peakDepth - 1]);
                        }
                    }
                    double pScore = 0.0;
                    for (int depth = 0; depth < weightedScores.Count; ++depth)
                    {
                        pScore += weightedScores[depth];
                    }


                    //Nothing to calculate
                    dt.Rows[t]["BestSequence"] = peptideSeq;
                    dt.Rows[t]["PeptideScore"] = "" + pScore;
                    if (myPositionsList[0].Count(i => i > 0) > 0)
                    {
                        dt.Rows[t]["AScore1"] = "1000";
                    }
                    else
                    {
                        dt.Rows[t]["AScore1"] = "-1";
                    }
                    dt.Rows[t]["numSiteIonsMatched1"] = 0;
                    dt.Rows[t]["numSiteIonsPoss1"]    = 0;
                    dt.Rows[t]["SecondSequence1"]     = "--";
                }
                else
                {
                    dt.Rows[t]["BestSequence"]        = peptideSeq;
                    dt.Rows[t]["PeptideScore"]        = "0.0";
                    dt.Rows[t]["AScore1"]             = "-1";
                    dt.Rows[t]["numSiteIonsMatched1"] = 0;
                    dt.Rows[t]["numSiteIonsPoss1"]    = 0;
                    dt.Rows[t]["SecondSequence1"]     = "--";
                }
            }


            Utilities.WriteDataTableToText(dt, outputFilePath);
        }