Exemplo n.º 1
0
        /// <summary>
        /// Pull sequences out of AScore results, clean them, and output them to a peptide sequence list file
        /// </summary>
        private void CreatePeptideList()
        {
            var columnMap = new Dictionary <string, int>(StringComparer.OrdinalIgnoreCase);
            var peptides  = new Dictionary <string, int>();

            // Write out a list of peptides for clsPeptideToProteinMapEngine
            using var aScoreReader  = new StreamReader(new FileStream(mAScoreResultsFilePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite));
            using var peptideWriter = new StreamWriter(new FileStream(mPeptideListFilePath, FileMode.Create, FileAccess.Write, FileShare.Read));

            while (!aScoreReader.EndOfStream)
            {
                var dataLine = aScoreReader.ReadLine();
                if (string.IsNullOrWhiteSpace(dataLine))
                {
                    continue;
                }

                var columns = dataLine.Split('\t');

                if (columnMap.Count == 0)
                {
                    // Assume the first line is column names
                    for (var i = 0; i < columns.Length; ++i)
                    {
                        columnMap.Add(columns[i], i);
                    }

                    var requiredColumns = new List <string>
                    {
                        "BestSequence"
                    };

                    if (!VerifyRequiredColumns(requiredColumns, columnMap, "CreatePeptideList", mAScoreResultsFilePath))
                    {
                        return;
                    }

                    continue;
                }

                var sequence      = columns[columnMap["BestSequence"]];
                var cleanSequence = PeptideCleavageStateCalculator.ExtractCleanSequenceFromSequenceWithMods(sequence, true);
                if (!peptides.ContainsKey(cleanSequence))
                {
                    peptides.Add(cleanSequence, 0);
                }

                peptideWriter.WriteLine(cleanSequence);
            }

            mDistinctPeptides = peptides.Count;
        }
Exemplo n.º 2
0
        /// <summary>
        /// Combine the data from AScore and the PeptideToProteinMapper into one results file
        /// </summary>
        private void CombineAScoreAndProteinData()
        {
            // Read the AScore again...
            using var aScoreReader = new StreamReader(new FileStream(mAScoreResultsFilePath, FileMode.Open, FileAccess.Read, FileShare.Read));
            using var mappedWriter = new StreamWriter(new FileStream(mMappingResultsFilePath, FileMode.Create, FileAccess.Write, FileShare.Read));

            var columnMapAScore = new Dictionary <string, int>(StringComparer.OrdinalIgnoreCase);

            if (aScoreReader.EndOfStream)
            {
                return;
            }

            // Run as long as we can successfully read
            while (!aScoreReader.EndOfStream)
            {
                var dataLine = aScoreReader.ReadLine();
                if (string.IsNullOrWhiteSpace(dataLine))
                {
                    continue;
                }

                var columns = dataLine.Split('\t');

                if (columnMapAScore.Count == 0)
                {
                    for (var i = 0; i < columns.Length; ++i)
                    {
                        columnMapAScore.Add(columns[i], i);
                    }

                    var requiredColumns = new List <string>
                    {
                        "BestSequence"
                    };

                    if (!VerifyRequiredColumns(requiredColumns, columnMapAScore, "CombineAScoreAndProteinData", mAScoreResultsFilePath))
                    {
                        return;
                    }

                    var outputFileHeaders = new List <string>();
                    outputFileHeaders.AddRange(columns);

                    // Append additional columns to outputFileHeaders
                    outputFileHeaders.Add("ProteinName");

                    // Protein Description - if it contains key-value pairs, use it.
                    if (mOutputProteinDescriptions)
                    {
                        outputFileHeaders.Add("Description");
                    }

                    outputFileHeaders.Add("ProteinCount");
                    outputFileHeaders.Add("Residue");
                    outputFileHeaders.Add("Position");

                    mappedWriter.WriteLine(string.Join("\t", outputFileHeaders));

                    continue;
                }

                var sequence = columns[columnMapAScore["BestSequence"]];

                var cleanSequence = PeptideCleavageStateCalculator.ExtractCleanSequenceFromSequenceWithMods(sequence, true);

                ++mTotalPeptides;

                if (!mPeptideToProteinMap.ContainsKey(cleanSequence))
                {
                    // Match not found
                    WriteCombinedLine(mappedWriter, dataLine);

                    if (!mPeptidesNotFound.ContainsKey(cleanSequence))
                    {
                        mPeptidesNotFound.Add(cleanSequence, 0);
                    }
                    mPeptidesNotFound[cleanSequence]++;
                    ++mTotalPeptidesNotFound;

                    continue;
                }

                PeptideCleavageStateCalculator.SplitPrefixAndSuffixFromSequence(sequence, out var noPrefixSequence, out _, out _);

                var mods = new List <int>();

                for (var i = 0; i < noPrefixSequence.Length; ++i)
                {
                    if (noPrefixSequence[i] == '*')
                    {
                        mods.Add(i);
                    }
                }

                foreach (var match in mPeptideToProteinMap[cleanSequence])
                {
                    // Protein Name
                    var proteinName = match.proteinName;

                    var proteinDescription = string.Empty;

                    // Protein Description - if it contains key-value pairs, use it.
                    if (mOutputProteinDescriptions)
                    {
                        proteinDescription = mProteinDescriptions[match.proteinName];
                    }

                    // # of proteins occurred in
                    var proteinCount = mPeptideToProteinMap[cleanSequence].Count;

                    var matchFound = false;

                    for (var i = 0; i < mods.Count; ++i)
                    {
                        matchFound = true;

                        var modifiedResidue = ' ';

                        // Residue of mod
                        if (mods[i] > 0)
                        {
                            modifiedResidue = noPrefixSequence[mods[i] - 1];
                        }

                        // Position of residue
                        // With multiple residues, we need to adjust the position of each subsequent residue by the number of residues we have read
                        var residuePosition = match.residueStart + mods[i] - i - 1;

                        WriteCombinedLine(mappedWriter, dataLine, proteinName, proteinDescription, proteinCount, modifiedResidue, residuePosition);
                    }

                    if (!matchFound)
                    {
                        const char modifiedResidue = ' ';
                        const int  residuePosition = 0;

                        WriteCombinedLine(mappedWriter, dataLine, proteinName, proteinDescription, proteinCount, modifiedResidue, residuePosition);
                    }
                }
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// Load the Peptide to Protein mapping using the specified PHRP result file
        /// </summary>
        /// <remarks>The PepToProtMap file contains Residue_Start and Residue_End columns</remarks>
        /// <param name="filePath"></param>
        /// <param name="pepToProteinMap">Peptide to protein mapping</param>
        /// <returns>True if successful, false if an error</returns>
        private bool LoadPepToProtMapData(string filePath, IDictionary <string, PepToProteinMapInfo> pepToProteinMap)
        {
            var linesRead      = 0;
            var lastProgress   = DateTime.UtcNow;
            var notifyComplete = false;

            try
            {
                // Read the data from the PepToProtMap file
                using var reader = new StreamReader(new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite));

                while (!reader.EndOfStream)
                {
                    var lineIn = reader.ReadLine();
                    linesRead++;

                    if (!string.IsNullOrEmpty(lineIn))
                    {
                        var splitLine = lineIn.Split('\t');

                        if (splitLine.Length >= 4)
                        {
                            // Parse out the numbers from the last two columns
                            // (the first line of the file is the header line, and it will get skipped)
                            if (int.TryParse(splitLine[2], out var residueStart))
                            {
                                if (int.TryParse(splitLine[3], out var residueEnd))
                                {
                                    var peptide = PeptideCleavageStateCalculator.ExtractCleanSequenceFromSequenceWithMods(splitLine[0], true);

                                    if (pepToProteinMap.TryGetValue(peptide, out var pepToProtMapInfo))
                                    {
                                        if (MaxProteinsPerSeqID == 0 || pepToProtMapInfo.ProteinCount < MaxProteinsPerSeqID)
                                        {
                                            pepToProtMapInfo.AddProtein(splitLine[1], residueStart, residueEnd);
                                        }
                                    }
                                    else
                                    {
                                        pepToProtMapInfo = new PepToProteinMapInfo(splitLine[1], residueStart, residueEnd);

                                        pepToProteinMap.Add(peptide, pepToProtMapInfo);
                                    }
                                }
                            }
                        }

                        if (linesRead % 100 != 0)
                        {
                            continue;
                        }

                        if (DateTime.UtcNow.Subtract(lastProgress).TotalSeconds < 5)
                        {
                            continue;
                        }

                        var percentComplete = reader.BaseStream.Position / (float)reader.BaseStream.Length * 100;
                        Console.WriteLine(" ... caching PepToProtMapData: {0:0.0}% complete", percentComplete);
                        lastProgress   = DateTime.UtcNow;
                        notifyComplete = true;
                    }
                }

                if (notifyComplete)
                {
                    Console.WriteLine(" ... caching PepToProtMapData: 100% complete");
                }
            }
            catch (Exception ex)
            {
                throw new Exception("Exception loading Peptide to Protein Map data from " + Path.GetFileName(filePath) + ": " + ex.Message);
            }

            return(true);
        }