예제 #1
0
        public void MeasureAminoAcidDistances(string pdbFilePath, int sequenceNumber1, int sequenceNumber2)
        {
            var pdb                 = PdbReader.ReadFile(pdbFilePath);
            var firstChain          = pdb.Models.First().Chains.First();
            var aminoAcidAngles     = AminoAcidAngleMeasurer.MeasureAngles(firstChain);
            var aminoAcid1          = firstChain.AminoAcids.Find(x => x.SequenceNumber == sequenceNumber1);
            var aminoAcid2          = firstChain.AminoAcids.Find(x => x.SequenceNumber == sequenceNumber2);
            var aminoAcid1Angles    = aminoAcidAngles[aminoAcid1];
            var aminoAcid2Angles    = aminoAcidAngles[aminoAcid2];
            var carbonAlphaDistance = aminoAcid1.GetAtomFromName("CA").Position
                                      .DistanceTo(aminoAcid2.GetAtomFromName("CA").Position);

            Console.WriteLine("Carbon alpha distance: " + carbonAlphaDistance);
            Console.WriteLine($"Amino acid {sequenceNumber1} ({aminoAcid1.Name}) angles: {aminoAcid1Angles}");
            Console.WriteLine($"Amino acid {sequenceNumber2} ({aminoAcid2.Name}) angles: {aminoAcid2Angles}");

            var carbonAlphaCarbonDistance = aminoAcid1.GetAtomFromName("CA").Position
                                            .DistanceTo(aminoAcid1.GetAtomFromName("C").Position);

            Console.WriteLine($"Carbon alpha-Carbon distance: {carbonAlphaCarbonDistance}");

            var carbonNitrogenDistance = aminoAcid1.GetAtomFromName("C").Position
                                         .DistanceTo(aminoAcid2.GetAtomFromName("N").Position);

            Console.WriteLine($"Carbon-Nitrogen distance: {carbonNitrogenDistance}");
        }
예제 #2
0
        public void DetectedAlphaHelixMatchesOriginalAnnotation(string pdbFilePath)
        {
            var pdb     = PdbReader.ReadFile(pdbFilePath);
            var sut     = new AlphaHelixDetector();
            var peptide = pdb.Models.First().Chains.First();
            var actual  = sut.Detect(peptide);
            var expectedHelixAminoAcids = peptide.Annotations
                                          .SelectMany(x => x.AminoAcidReferences)
                                          .Select(x => x.SequenceNumber)
                                          .ToList();
            var actualHelixAminoAcids = actual
                                        .SelectMany(x => x.AminoAcidReferences)
                                        .Select(x => x.SequenceNumber)
                                        .ToList();
            var truePositives = actualHelixAminoAcids.Intersect(expectedHelixAminoAcids).ToList();

            Assert.That(
                truePositives.Count,
                Is.GreaterThan(0.9 * expectedHelixAminoAcids.Count));
            var falsePositives = actualHelixAminoAcids.Except(expectedHelixAminoAcids).ToList();

            Assert.That(
                falsePositives.Count,
                Is.LessThan(0.1 * expectedHelixAminoAcids.Count));
            var falseNegatives = expectedHelixAminoAcids.Except(actualHelixAminoAcids).ToList();

            Assert.That(
                falseNegatives.Count,
                Is.LessThan(0.1 * expectedHelixAminoAcids.Count));
        }
예제 #3
0
 public static void ExtractFullSequenceFromFile(
     string pdbFile,
     List <string> output)
 {
     try
     {
         var pdbResult = PdbReader.ReadFile(pdbFile);
         if (!pdbResult.Models.Any() || !pdbResult.Models.First().Chains.Any())
         {
             return;
         }
         output.Add("#" + Path.GetFileNameWithoutExtension(pdbFile));
         foreach (var chain in pdbResult.Models.First().Chains)
         {
             var helixAnnotations = chain.Annotations.Where(annot => annot.Type == PeptideSecondaryStructure.AlphaHelix).ToList();
             var fullSequence     = GetFullSequence(chain, helixAnnotations);
             //var helixSequence = GetHelixSequences(helixAnnotations);
             output.Add(fullSequence);
         }
     }
     catch (Exception e)
     {
         Console.WriteLine("Exception: " + e.Message);
     }
 }
예제 #4
0
        public void AlignPdbSubsequences()
        {
            var pdbCode1        = "1xmj";
            var startIndex1     = 48;
            var pdbCode2        = "2bbo";
            var startIndex2     = 60;
            var length          = 40;
            var outputDirectory = @"C:\Temp";

            var pdbFile1             = $@"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\pdb{pdbCode1}.ent";
            var pdbFile2             = $@"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\pdb{pdbCode2}.ent";
            var peptide1             = PdbReader.ReadFile(pdbFile1).Models.First().Chains.First();
            var peptide2             = PdbReader.ReadFile(pdbFile2).Models.First().Chains.First();
            var proteinAligner       = new ProteinAligner();
            var proteinAlignerResult = proteinAligner.AlignSubsequence(peptide1, startIndex1, peptide2, startIndex2, length);
            var alignmentTransform   = proteinAlignerResult.Transformation;

            peptide2.Molecule.Atoms
            .Where(atom => atom.IsPositioned)
            .ForEach(atom =>
            {
                atom.IsPositionFixed = false;
                atom.Position        = alignmentTransform.Apply(atom.Position.In(SIPrefix.Pico, Unit.Meter)).To(SIPrefix.Pico, Unit.Meter);
            });
            var repositionedPdb = PdbSerializer.Serialize(pdbCode2, peptide2);

            File.Copy(pdbFile1, Path.Combine(outputDirectory, $@"pdb{pdbCode1}.ent"), true);
            File.WriteAllText(
                Path.Combine(outputDirectory, $@"pdb{pdbCode2}_repositioned_{pdbCode1}_sub{startIndex1}-{startIndex2}-{length}.ent"),
                repositionedPdb);
        }
예제 #5
0
        public void PdbReaderDebug()
        {
            var inputDirectory = @"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\Failed";

            foreach (var pdbFile in Directory.EnumerateFiles(inputDirectory, "*.ent"))
            {
                PdbReader.ReadFile(pdbFile);
                File.Delete(pdbFile);
            }
        }
예제 #6
0
        public void AminoAcidSequenceNumberAsExpected(string pdbFile, int sequenceStart, int sequenceStop)
        {
            var pdbResult = PdbReader.ReadFile(pdbFile);

            Assume.That(pdbResult.Models.Any());
            var firstModel = pdbResult.Models.First();
            var firstChain = firstModel.Chains.First();

            Assert.That(firstChain.AminoAcids.First().SequenceNumber, Is.EqualTo(sequenceStart));
            Assert.That(firstChain.AminoAcids.Last().SequenceNumber, Is.EqualTo(sequenceStop));
        }
예제 #7
0
        public void ExtractAminoAcidPositions()
        {
            var inputDirectory  = @"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned";
            var failedDirectory = @"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\Failed";
            var outputDirection = @"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\AminoAcidPositions";
            var files           = Directory.EnumerateFiles(inputDirectory, "pdb5uak.ent");

            Parallel.ForEach(files, pdbFile =>
            {
                try
                {
                    using (var pdbResult = PdbReader.ReadFile(pdbFile))
                    {
                        for (var modelIdx = 0; modelIdx < pdbResult.Models.Count; modelIdx++)
                        {
                            var model = pdbResult.Models[modelIdx];
                            if (model.Chains.Count != 1)
                            {
                                continue;
                            }
                            var chain            = model.Chains.Single();
                            var carbonAlphaAtoms = chain.Molecule.Atoms.Where(atom => atom.AminoAcidAtomName == "CA").ToList();
                            if (!carbonAlphaAtoms.All(atom => atom.IsPositioned))
                            {
                                continue;
                            }
                            var lines         = new List <string>();
                            var allPositioned = true;
                            foreach (var aminoAcid in chain.AminoAcids)
                            {
                                var carbonAlpha = aminoAcid.GetAtomFromName("CA");
                                if (carbonAlpha == null || !carbonAlpha.IsPositioned)
                                {
                                    allPositioned = false;
                                    break;
                                }
                                lines.Add($"{aminoAcid.Name.ToOneLetterCode()};{carbonAlpha.Position.In(SIPrefix.Pico, Unit.Meter)}");
                            }
                            if (!allPositioned)
                            {
                                continue;
                            }
                            File.WriteAllLines(
                                Path.Combine(outputDirection, $"{Path.GetFileNameWithoutExtension(pdbFile)}_model{modelIdx:D3}.csv"),
                                lines);
                        }
                    }
                }
                catch
                {
                    File.Move(pdbFile, Path.Combine(failedDirectory, Path.GetFileName(pdbFile)));
                }
            });
        }
예제 #8
0
        public void AllModelsRead(string file, int expectedModelCount, int expectedPeptideLength)
        {
            var pdbResult = PdbReader.ReadFile(file);

            Assert.That(pdbResult.Models.Count, Is.EqualTo(expectedModelCount));
            foreach (var pdbModel in pdbResult.Models)
            {
                Assert.That(pdbModel.Chains.Count, Is.EqualTo(1));
                var peptide = pdbModel.Chains.Single();
                Assert.That(peptide.AminoAcids.Count, Is.EqualTo(expectedPeptideLength));
            }
        }
예제 #9
0
        private static Dictionary <AminoAcidReference, AminoAcidAngles> MeasureDihedralAngles(string pdbFilename)
        {
            var result            = PdbReader.ReadFile(pdbFilename);
            var angleMeasurements = new Dictionary <AminoAcidReference, AminoAcidAngles>();

            foreach (var chain in result.Models.First().Chains)
            {
                var angleMeasurement = AminoAcidAngleMeasurer.MeasureAngles(chain);
                foreach (var kvp in angleMeasurement)
                {
                    angleMeasurements.Add(kvp.Key, kvp.Value);
                }
            }

            return(angleMeasurements);
        }
예제 #10
0
        public static Peptide Load(string filename)
        {
            var extension = Path.GetExtension(filename).ToLowerInvariant();

            switch (extension)
            {
            case ".pdb":
                var result = PdbReader.ReadFile(filename);
                return(result.Models.First().Chains.First());

            case ".aminoseq":
                return(AminoseqReader.ReadFile(filename));

            default:
                throw new ArgumentException($"File extension '{extension}' is unsupported");
            }
        }
예제 #11
0
        public void PdbReadTest()
        {
            var inputDirectory = @"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins";

            Directory.CreateDirectory(Path.Combine(inputDirectory, "NoChain"));
            Directory.CreateDirectory(Path.Combine(inputDirectory, "SingleChain"));
            Directory.CreateDirectory(Path.Combine(inputDirectory, "MultiChain"));

            var cancellationTokenSource = new CancellationTokenSource();
            var files = Directory.EnumerateFiles(inputDirectory, "*.ent");

            Parallel.ForEach(files, pdbFile =>
            {
                //cancellationTokenSource.Token.ThrowIfCancellationRequested();
                try
                {
                    using (var pdbResult = PdbReader.ReadFile(pdbFile))
                    {
                        var maxChainCount = pdbResult.Models.Max(model => model.Chains.Count);
                        if (maxChainCount == 0)
                        {
                            File.Move(pdbFile, Path.Combine(inputDirectory, "NoChain", Path.GetFileName(pdbFile)));
                        }
                        else if (maxChainCount == 1)
                        {
                            File.Move(pdbFile, Path.Combine(inputDirectory, "SingleChain", Path.GetFileName(pdbFile)));
                        }
                        else
                        {
                            File.Move(pdbFile, Path.Combine(inputDirectory, "MultiChain", Path.GetFileName(pdbFile)));
                        }
                    }
                }
                catch
                {
                    File.Move(pdbFile, Path.Combine(inputDirectory, "Failed", Path.GetFileName(pdbFile)));
                    //cancellationTokenSource.Cancel();
                }
            });
            if (cancellationTokenSource.IsCancellationRequested)
            {
                Assert.Fail();
            }
            Assert.Pass();
        }
예제 #12
0
        public void ProteinPdbSequenceAlignment()
        {
            var proteinIndexCsvFileDirectory = @"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\ByProtein";
            var outputDirectory  = @"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\SequenceOutput";
            var csvFiles         = Directory.EnumerateFiles(proteinIndexCsvFileDirectory, "*.csv");
            var failingSequences = new ConcurrentBag <string>();

            Parallel.ForEach(csvFiles, csvFile =>
            {
                try
                {
                    var proteinPdbPaths = File.ReadLines(csvFile);
                    var sequences       = new List <string>();
                    foreach (var proteinPdbPath in proteinPdbPaths)
                    {
                        var pdbFile           = PdbReader.ReadFile(proteinPdbPath);
                        var peptide           = pdbFile.Models.First().Chains.Single();
                        var maxSequenceNumber = peptide.AminoAcids.Max(aa => aa.SequenceNumber);
                        var sequence          = Enumerable.Repeat(' ', maxSequenceNumber).ToList();
                        foreach (var aminoAcidReference in peptide.AminoAcids)
                        {
                            if (aminoAcidReference.SequenceNumber < 1)
                            {
                                continue;
                            }
                            sequence[aminoAcidReference.SequenceNumber - 1] = aminoAcidReference.Name.ToOneLetterCode();
                        }
                        sequences.Add(new string(sequence.ToArray()));
                    }
                    var outputFile = Path.Combine(outputDirectory, Path.GetFileName(csvFile));
                    File.WriteAllLines(outputFile, sequences);
                }
                catch
                {
                    failingSequences.Add(csvFile);
                }
            });
            Console.WriteLine("Failing proteins:");
            foreach (var failingProteins in failingSequences)
            {
                Console.WriteLine(failingProteins);
            }
        }
예제 #13
0
        public void FilterByPositionedMolecules()
        {
            var inputDirectory = @"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain";

            Directory.CreateDirectory(Path.Combine(inputDirectory, "FullyPositioned"));
            Directory.CreateDirectory(Path.Combine(inputDirectory, "PartiallyPositioned"));
            Directory.CreateDirectory(Path.Combine(inputDirectory, "NotPositioned"));

            var files = Directory.EnumerateFiles(inputDirectory, "*.ent");

            Parallel.ForEach(files, pdbFile =>
            {
                using (var pdbResult = PdbReader.ReadFile(pdbFile))
                {
                    foreach (var model in pdbResult.Models)
                    {
                        if (model.Chains.Count != 1)
                        {
                            continue;
                        }
                        var chain            = model.Chains.Single();
                        var carbonAlphaAtoms = chain.Molecule.Atoms.Where(atom => atom.AminoAcidAtomName == "CA").ToList();
                        if (carbonAlphaAtoms.All(atom => atom.IsPositioned))
                        {
                            File.Move(pdbFile, Path.Combine(inputDirectory, "FullyPositioned", Path.GetFileName(pdbFile)));
                        }
                        else if (carbonAlphaAtoms.Any(atom => atom.IsPositioned))
                        {
                            File.Move(pdbFile, Path.Combine(inputDirectory, "PartiallyPositioned", Path.GetFileName(pdbFile)));
                        }
                        else
                        {
                            File.Move(pdbFile, Path.Combine(inputDirectory, "NotPositioned", Path.GetFileName(pdbFile)));
                        }
                        break;
                    }
                }
            });
        }
예제 #14
0
        public void MeasureAverageAminoAcidDistance(string pdbFilePath)
        {
            var pdb                = PdbReader.ReadFile(pdbFilePath);
            var firstChain         = pdb.Models.First().Chains.First();
            var lastAminoAcid      = firstChain.AminoAcids.First();
            var aminoAcidDistances = new List <UnitValue>();

            foreach (var aminoAcid in firstChain.AminoAcids.Skip(1))
            {
                var p1       = lastAminoAcid.GetAtomFromName("CA").Position;
                var p2       = aminoAcid.GetAtomFromName("CA").Position;
                var distance = p1.DistanceTo(p2);
                aminoAcidDistances.Add(distance);
            }

            var distancesInNanoMeter = aminoAcidDistances.Select(x => x.In(SIPrefix.Nano, Unit.Meter)).ToList();

            Console.WriteLine($"Average: {distancesInNanoMeter.Average():F3} nm");
            Console.WriteLine($"Median: {distancesInNanoMeter.Median():F3} nm");
            Console.WriteLine($"Minimum: {distancesInNanoMeter.Min():F3} nm");
            Console.WriteLine($"Maximum: {distancesInNanoMeter.Max():F3} nm");
            distancesInNanoMeter.ForEach(Console.WriteLine);
        }
예제 #15
0
        public void ApproximatePeptideIsFoldedToKnownStableState(string pdbFilePath)
        {
            var pdbReadResult      = PdbReader.ReadFile(pdbFilePath);
            var peptide            = pdbReadResult.Models.First().Chains.First();
            var approximatePeptide = ApproximatePeptideBuilder.FromPeptide(peptide);

            var simulationSettings = new ApproximatePeptideSimulationSettings
            {
                SimulationTime = 10.To(SIPrefix.Pico, Unit.Second),
                TimeStep       = 2.To(SIPrefix.Femto, Unit.Second)
            };
            var ramachadranDataDirectory = @"G:\Projects\HumanGenome\ramachadranDistributions";
            var simulator = ApproximatePeptideFoldingSimulatorFactory.Create(
                approximatePeptide, simulationSettings, ramachadranDataDirectory);

            simulator.TimestepCompleted   += Simulator_TimestepCompleted;
            simulator.SimulationCompleted += Simulator_SimulationCompleted;
            simulationWaitHandle.Reset();
            simulator.StartSimulation();

            simulationWaitHandle.WaitOne();
            Assert.Pass();
        }
        public void ExtractAndAnnotateHelixSequences()
        {
            var directory        = @"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\ByProtein";
            var outputFilePath   = @"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\humanFullyPositionedSingleChainUniqueProteinHelixMarked.txt";
            var pdbLookupFiles   = Directory.EnumerateFiles(directory, "*.csv");
            var pdbReaderOptions = new PdbReaderOptions {
                MaximumModelCount = 1, BuildMolecule = false
            };
            var outputLock = new object();

            File.Delete(outputFilePath);
            Parallel.ForEach(pdbLookupFiles, pdbLookupFile =>
            {
                var pdbFilePaths             = File.ReadAllLines(pdbLookupFile);
                var proteinOutput            = new List <string>();
                var maxProteinAminoAcidCount = 0;
                foreach (var pdbFilePath in pdbFilePaths)
                {
                    try
                    {
                        var pdbResult = PdbReader.ReadFile(pdbFilePath, pdbReaderOptions);
                        if (!pdbResult.Models.Any())
                        {
                            return;
                        }
                        var firstModel = pdbResult.Models.First();
                        if (!firstModel.Chains.Any())
                        {
                            return;
                        }
                        var hasHelixAnnotations = firstModel.Chains
                                                  .SelectMany(chain => chain.Annotations)
                                                  .Any(annotation => annotation.Type == PeptideSecondaryStructure.AlphaHelix);
                        if (!hasHelixAnnotations)
                        {
                            return;
                        }
                        var aminoAcidCount = firstModel.Chains.Sum(chain => chain.AminoAcids.Count);
                        if (aminoAcidCount <= maxProteinAminoAcidCount)
                        {
                            return;
                        }

                        proteinOutput.Clear();
                        proteinOutput.Add("#" + Path.GetFileNameWithoutExtension(pdbFilePath));
                        foreach (var chain in pdbResult.Models.First().Chains)
                        {
                            var helixAnnotations = chain.Annotations.Where(annot => annot.Type == PeptideSecondaryStructure.AlphaHelix).ToList();
                            var fullSequence     = AlphaHelixAnnotationTool.GetFullSequence(chain, helixAnnotations);
                            //var helixSequence = AlphaHelixAnnotationTool.GetHelixSequences(helixAnnotations);
                            proteinOutput.Add(fullSequence);
                        }

                        maxProteinAminoAcidCount = aminoAcidCount;
                    }
                    catch (Exception e)
                    {
                        var errorMessage = $"Exception: {e.Message}(Path: {pdbFilePath})";
                        Console.WriteLine(errorMessage);
                        File.AppendAllLines(@"C:\Temp\errors.txt", new [] { errorMessage });
                    }
                }
                lock (outputLock)
                {
                    File.AppendAllLines(outputFilePath, proteinOutput);
                }
            });
        }
예제 #17
0
        public void AlignAllModelsOfProtein(string proteinName, bool storeIndividualAlignedPdb)
        {
            var outputDirectory = Path.Combine(@"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\AlignedProteins", proteinName);

            if (!Directory.Exists(outputDirectory))
            {
                Directory.CreateDirectory(outputDirectory);
            }
            var proteinListDirectory = @"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\ByProtein";
            var pdbListFile          = Path.Combine(proteinListDirectory, proteinName + ".csv");
            var pdbFiles             = File.ReadLines(pdbListFile).ToList();
            var firstPeptide         = PdbReader.ReadFile(pdbFiles.First()).Models.First().Chains.First();

            File.Copy(pdbFiles.First(), Path.Combine(outputDirectory, Path.GetFileName(pdbFiles.First())), true);
            var proteinAligner = new ProteinAligner();
            var combinedModels = new List <Peptide> {
                firstPeptide
            };
            var modelErrors = new Dictionary <string, UnitValue> {
                { pdbFiles.First(), 0.To(Unit.Meter) }
            };

            foreach (var pdbFile in pdbFiles.Skip(1))
            {
                var peptide = PdbReader.ReadFile(pdbFile).Models.First().Chains.First();
                var proteinAlignerResult = proteinAligner.Align(firstPeptide, peptide);
                var alignmentTransform   = proteinAlignerResult.Transformation;
                peptide.Molecule.Atoms
                .Where(atom => atom.IsPositioned)
                .ForEach(atom =>
                {
                    atom.IsPositionFixed = false;
                    atom.Position        = alignmentTransform.Apply(atom.Position.In(SIPrefix.Pico, Unit.Meter)).To(SIPrefix.Pico, Unit.Meter);
                });
                var modelError = proteinAlignerResult.IsTransformationValid
                    ? proteinAlignerResult.AveragePositionError
                    : double.PositiveInfinity.To(Unit.Meter);
                modelErrors.Add(pdbFile, modelError);
                combinedModels.Add(peptide);
                if (storeIndividualAlignedPdb)
                {
                    var pdbId           = Path.GetFileNameWithoutExtension(pdbFile).Replace("pdb", "");
                    var repositionedPdb = PdbSerializer.Serialize(pdbId, peptide);
                    File.WriteAllText(
                        Path.Combine(outputDirectory, $"pdb{pdbId}.ent"),
                        repositionedPdb);
                }
            }

            var medianError = modelErrors.Values.Select(x => x.In(SIPrefix.Pico, Unit.Meter)).Median();
            var stdError    = modelErrors.Values
                              .Select(x => x.In(SIPrefix.Pico, Unit.Meter))
                              .Average(x => x.Square()).Sqrt();
            var validModels = pdbFiles
                              .Select((pdbFile, idx) => new
            {
                PdbFile = pdbFile,
                Model   = combinedModels[idx],
                Error   = modelErrors[pdbFile].In(SIPrefix.Pico, Unit.Meter)
            })
                              .Where(x => x.Error < Math.Min(medianError + 2 * stdError, 1000))
                              .Select(x => x.Model)
                              .ToArray();

            var combinedPdb = PdbSerializer.Serialize("1234", validModels);

            File.WriteAllText(
                Path.Combine(outputDirectory, "pdb_combined.ent"),
                combinedPdb);
            File.WriteAllLines(
                Path.Combine(outputDirectory, "averageError.csv"),
                modelErrors.Select(kvp => $"{kvp.Key};{kvp.Value.In(SIPrefix.Pico, Unit.Meter)}"));
        }