public void GeneratePdbForPeptide() { var peptide = PeptideBuilder.PeptideFromString( "MQRSPLEKASVVSKLFFSWTRPILRKGYRQRLELSDIYQIPSVDSADNLSEKLEREWDRE" + "LASKKNPKLINALRRCFFWRFMFYGIFLYLGEVTKAVQPLLLGRIIASYDPDNKEERSIA" + "IYLGIGLCLLFIVRTLLLHPAIFGLHHIGMQMRIAMFSLIYKKTLKLSSRVLDKISIGQL" + "VSLLSNNLNKFDEGLALAHFVWIAPLQVALLMGLIWELLQASAFCGLGFLIVLALFQAGL" + "GRMMMKYRDQRAGKISERLVITSEMIENIQSVKAYCWEEAMEKMIENLRQTELKLTRKAA" + "YVRYFNSSAFFFSGFFVVFLSVLPYALIKGIILRKIFTTISFCIVLRMAVTRQFPWAVQT" + "WYDSLGAINKIQDFLQKQEYKTLEYNLTTTEVVMENVTAFWEEGFGELFEKAKQNNNNRK" + "TSNGDDSLFFSNFSLLGTPVLKDINFKIERGQLLAVAGSTGAGKTSLLMVIMGELEPSEG" + "KIKHSGRISFCSQFSWIMPGTIKENIIFGVSYDEYRYRSVIKACQLEEDISKFAEKDNIV" + "LGEGGITLSGGQRARISLARAVYKDADLYLLDSPFGYLDVLTEKEIFESCVCKLMANKTR" + "ILVTSKMEHLKKADKILILHEGSSYFYGTFSELQNLQPDFSSKLMGCDSFDQFSAERRNS" + "ILTETLHRFSLEGDAPVSWTETKKQSFKQTGEFGEKRKNSILNPINSIRKFSIVQKTPLQ" + "MNGIEEDSDEPLERRLSLVPDSEQGEAILPRISVISTGPTLQARRRQSVLNLMTHSVNQG" + "QNIHRKTTASTRKVSLAPQANLTELDIYSRRLSQETGLEISEEINEEDLKECFFDDMESI" + "PAVTTWNTYLRYITVHKSLIFVLIWCLVIFLAEVAASLVVLWLLGNTPLQDKGNSTHSRN" + "NSYAVIITSTSSYYVFYIYVGVADTLLAMGFFRGLPLVHTLITVSKILHHKMLHSVLQAP" + "MSTLNTLKAGGILNRFSKDIAILDDLLPLTIFDFIQLLLIVIGAIAVVAVLQPYIFVATV" + "PVIVAFIMLRAYFLQTSQQLKQLESEGRSPIFTHLVTSLKGLWTLRAFGRQPYFETLFHK" + "ALNLHTANWFLYLSTLRWFQMRIEMIFVIFFIAVTFISILTTGEGEGRVGIILTLAMNIM" + "STLQWAVNSSIDVDSLMRSVSRVFKFIDMPTEGKPTKSTKPYKNGQLSKVMIIENSHVKK" + "DDIWPSGGQMTVKDLTAKYTEGGNAILENISFSISPGQRVGLLGRTGSGKSTLLSAFLRL" + "LNTEGEIQIDGVSWDSITLQQWRKAFGVIPQKVFIFSGTFRKNLDPYEQWSDQEIWKVAD" + "EVGLRSVIEQFPGKLDFVLVDGGCVLSHGHKQLMCLARSVLSKAKILLLDEPSAHLDPVT" + "YQIIRRTLKQAFADCTVILCEHRIEAMLECQQFLVIEENKVRQYDSIQKLLNERSLFRQA" + "ISPSDRVKLFPHRNSSKCKSKPQIAALKEETEEEVQDTRL"); peptide.Molecule.PositionAtoms(peptide.MoleculeReference.FirstAtomId, peptide.MoleculeReference.LastAtomId); File.WriteAllText(@"G:\Projects\HumanGenome\cftr.pdb", PdbSerializer.Serialize("cftr", peptide)); }
public void AlignPdbSubsequences() { var pdbCode1 = "1xmj"; var startIndex1 = 48; var pdbCode2 = "2bbo"; var startIndex2 = 60; var length = 40; var outputDirectory = @"C:\Temp"; var pdbFile1 = $@"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\pdb{pdbCode1}.ent"; var pdbFile2 = $@"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\pdb{pdbCode2}.ent"; var peptide1 = PdbReader.ReadFile(pdbFile1).Models.First().Chains.First(); var peptide2 = PdbReader.ReadFile(pdbFile2).Models.First().Chains.First(); var proteinAligner = new ProteinAligner(); var proteinAlignerResult = proteinAligner.AlignSubsequence(peptide1, startIndex1, peptide2, startIndex2, length); var alignmentTransform = proteinAlignerResult.Transformation; peptide2.Molecule.Atoms .Where(atom => atom.IsPositioned) .ForEach(atom => { atom.IsPositionFixed = false; atom.Position = alignmentTransform.Apply(atom.Position.In(SIPrefix.Pico, Unit.Meter)).To(SIPrefix.Pico, Unit.Meter); }); var repositionedPdb = PdbSerializer.Serialize(pdbCode2, peptide2); File.Copy(pdbFile1, Path.Combine(outputDirectory, $@"pdb{pdbCode1}.ent"), true); File.WriteAllText( Path.Combine(outputDirectory, $@"pdb{pdbCode2}_repositioned_{pdbCode1}_sub{startIndex1}-{startIndex2}-{length}.ent"), repositionedPdb); }
public void RunSimulationForSequence(string pdbCode, string sequenceString, string outputFilePath) { var distributionDirectory = @"G:\Projects\HumanGenome\ramachadranDistributions"; var sut = new MultiLevelGrowingFoldingSimulator(distributionDirectory); var settings = new MultiLevelGrowingFoldingSimulatorSettings { }; var aminoAcidSequence = sequenceString .Select(c => c.ToAminoAcidName()) .ToList(); var peptide = sut.Simulate(aminoAcidSequence, settings); var approximatePeptideCompleter = new ApproximatePeptideCompleter(peptide); var completedPeptide = approximatePeptideCompleter.GetBackbone(); File.WriteAllText(outputFilePath, PdbSerializer.Serialize(pdbCode, completedPeptide)); }
public void AlignAllModelsOfProtein(string proteinName, bool storeIndividualAlignedPdb) { var outputDirectory = Path.Combine(@"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\AlignedProteins", proteinName); if (!Directory.Exists(outputDirectory)) { Directory.CreateDirectory(outputDirectory); } var proteinListDirectory = @"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\ByProtein"; var pdbListFile = Path.Combine(proteinListDirectory, proteinName + ".csv"); var pdbFiles = File.ReadLines(pdbListFile).ToList(); var firstPeptide = PdbReader.ReadFile(pdbFiles.First()).Models.First().Chains.First(); File.Copy(pdbFiles.First(), Path.Combine(outputDirectory, Path.GetFileName(pdbFiles.First())), true); var proteinAligner = new ProteinAligner(); var combinedModels = new List <Peptide> { firstPeptide }; var modelErrors = new Dictionary <string, UnitValue> { { pdbFiles.First(), 0.To(Unit.Meter) } }; foreach (var pdbFile in pdbFiles.Skip(1)) { var peptide = PdbReader.ReadFile(pdbFile).Models.First().Chains.First(); var proteinAlignerResult = proteinAligner.Align(firstPeptide, peptide); var alignmentTransform = proteinAlignerResult.Transformation; peptide.Molecule.Atoms .Where(atom => atom.IsPositioned) .ForEach(atom => { atom.IsPositionFixed = false; atom.Position = alignmentTransform.Apply(atom.Position.In(SIPrefix.Pico, Unit.Meter)).To(SIPrefix.Pico, Unit.Meter); }); var modelError = proteinAlignerResult.IsTransformationValid ? proteinAlignerResult.AveragePositionError : double.PositiveInfinity.To(Unit.Meter); modelErrors.Add(pdbFile, modelError); combinedModels.Add(peptide); if (storeIndividualAlignedPdb) { var pdbId = Path.GetFileNameWithoutExtension(pdbFile).Replace("pdb", ""); var repositionedPdb = PdbSerializer.Serialize(pdbId, peptide); File.WriteAllText( Path.Combine(outputDirectory, $"pdb{pdbId}.ent"), repositionedPdb); } } var medianError = modelErrors.Values.Select(x => x.In(SIPrefix.Pico, Unit.Meter)).Median(); var stdError = modelErrors.Values .Select(x => x.In(SIPrefix.Pico, Unit.Meter)) .Average(x => x.Square()).Sqrt(); var validModels = pdbFiles .Select((pdbFile, idx) => new { PdbFile = pdbFile, Model = combinedModels[idx], Error = modelErrors[pdbFile].In(SIPrefix.Pico, Unit.Meter) }) .Where(x => x.Error < Math.Min(medianError + 2 * stdError, 1000)) .Select(x => x.Model) .ToArray(); var combinedPdb = PdbSerializer.Serialize("1234", validModels); File.WriteAllText( Path.Combine(outputDirectory, "pdb_combined.ent"), combinedPdb); File.WriteAllLines( Path.Combine(outputDirectory, "averageError.csv"), modelErrors.Select(kvp => $"{kvp.Key};{kvp.Value.In(SIPrefix.Pico, Unit.Meter)}")); }