Esempio n. 1
0
        private static void SequenceAlignment()
        {
            if (!Directory.Exists(OutputDirectory))
            {
                Directory.CreateDirectory(OutputDirectory);
            }

            var peptides = PeptideFileReader.ReadPeptides(@"G:\Projects\HumanGenome\Homo_sapiens.GRCh38.pep.all.fa");

            string lastChromosome = null;
            string chromosomeData = null;

            foreach (var peptide in peptides.OrderBy(pep => pep.Chromosome))
            {
                Console.WriteLine(peptide.GeneSymbol + ", chromosome " + peptide.Chromosome);

                var chromosomeFile = Directory.GetFiles(ChromosomeDataDirectory, $"*chromosome_{peptide.Chromosome}.*").Single();
                if (chromosomeData == null || peptide.Chromosome != lastChromosome)
                {
                    chromosomeData = File.ReadAllText(chromosomeFile);
                    lastChromosome = peptide.Chromosome;
                }
                var nucleotides = chromosomeData.Substring(peptide.StartBase - 1, peptide.EndBase - peptide.StartBase + 1);

                var peptideDescription = $"{peptide.GeneSymbol}:{peptide.Chromosome}:{peptide.StartBase}:{peptide.EndBase}";
                try
                {
                    var exons = ExonExtractor.ExtractExons(nucleotides, new string(peptide.Sequence.ToArray()));

                    var alignedSequence = BuildAlignedSequence(nucleotides, exons);
                    var statistics      = Environment.NewLine
                                          + "Statistics:" + Environment.NewLine
                                          + "-------------------------------" + Environment.NewLine
                                          + "Exon count: " + exons.Count + Environment.NewLine
                                          + "Shortest exon: " + exons.Min(e => e.AminoAcids.Count) + Environment.NewLine
                                          + "Longest exon: " + exons.Max(e => e.AminoAcids.Count) + Environment.NewLine
                                          + "Median exon length: " + exons.Median(e => e.AminoAcids.Count);
                    File.AppendAllLines(Path.Combine(OutputDirectory, peptide.GeneSymbol + ".txt"), new[]
                    {
                        peptideDescription,
                        InterleaveLines(SpliceText(nucleotides, 120), SpliceText(alignedSequence, 120))
                        .Aggregate((line1, line2) => line1 + Environment.NewLine + line2),
                        statistics,
                        Environment.NewLine
                    });
                }
                catch (Exception ex)
                {
                    File.AppendAllLines(Path.Combine(OutputDirectory, peptide.GeneSymbol + ".txt"), new[]
                    {
                        peptideDescription,
                        ex.Message,
                        Environment.NewLine
                    });
                }
            }
        }
Esempio n. 2
0
        private static void WellAlignedSequences()
        {
            var peptides = PeptideFileReader.ReadPeptides(@"G:\Projects\HumanGenome\Homo_sapiens.GRCh38.pep.all.fa");

            string    lastChromosome        = null;
            string    chromosomeData        = null;
            const int longSequenceThreshold = 10;

            foreach (var peptide in peptides.OrderBy(pep => pep.Chromosome))
            {
                Console.WriteLine(peptide.GeneSymbol + ", chromosome " + peptide.Chromosome);

                var chromosomeFile = Directory.GetFiles(ChromosomeDataDirectory, $"*chromosome_{peptide.Chromosome}.*").Single();
                if (chromosomeData == null || peptide.Chromosome != lastChromosome)
                {
                    chromosomeData = File.ReadAllText(chromosomeFile);
                    lastChromosome = peptide.Chromosome;
                }
                var nucleotides = chromosomeData.Substring(peptide.StartBase - 1, peptide.EndBase - peptide.StartBase + 1);

                var peptideDescription = $"{peptide.GeneSymbol}:{peptide.Chromosome}:{peptide.StartBase}:{peptide.EndBase}";
                try
                {
                    if (!peptide.Sequence.Any())
                    {
                        continue;
                    }
                    if (peptide.Sequence.First() != 'M')
                    {
                        continue;
                    }
                    var exons                      = ExonExtractor.ExtractExons(nucleotides, new string(peptide.Sequence.ToArray()));
                    var sequenceLengths            = exons.Select(e => e.AminoAcids.Count).ToList();
                    var longSequenceMarkers        = sequenceLengths.Select(x => x >= longSequenceThreshold).ToList();
                    var hasContiguousLongSequences = Enumerable.Range(0, longSequenceMarkers.Count - 1)
                                                     .Select(idx => longSequenceMarkers[idx] && longSequenceMarkers[idx + 1])
                                                     .Any(x => x);
                    if (!hasContiguousLongSequences)
                    {
                        continue;
                    }
                    var exonData = peptideDescription + ":" + exons
                                   .Select(e =>
                                           e.StartNucelotideIndex
                                           + ";"
                                           + (e.StartNucelotideIndex + 3 * e.AminoAcids.Count - 1))
                                   .Aggregate((a, b) => a + ";" + b);
                    File.AppendAllText(@"well_matched_peptides.csv", exonData + Environment.NewLine);
                }
                catch
                {
                    // ignored
                }
            }
        }
Esempio n. 3
0
        private static void TestExonExtraction()
        {
            var testPeptideSequence = "MAGVRTTGKLTT";
            var testNucleotides     = NucleotideFromPeptideSequence("IIMAGVLI#ARTTIIVLRTTGKLRTT#ITT");
            var testExons           = ExonExtractor.ExtractExons(testNucleotides, testPeptideSequence);
            var testAlignedSequence = BuildAlignedSequence(testNucleotides, testExons);

            File.WriteAllLines(@"peptide_alignment.txt", new[]
            {
                testNucleotides,
                testAlignedSequence
            });
        }