public void AlignPdbSubsequences()
        {
            var pdbCode1        = "1xmj";
            var startIndex1     = 48;
            var pdbCode2        = "2bbo";
            var startIndex2     = 60;
            var length          = 40;
            var outputDirectory = @"C:\Temp";

            var pdbFile1             = $@"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\pdb{pdbCode1}.ent";
            var pdbFile2             = $@"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\pdb{pdbCode2}.ent";
            var peptide1             = PdbReader.ReadFile(pdbFile1).Models.First().Chains.First();
            var peptide2             = PdbReader.ReadFile(pdbFile2).Models.First().Chains.First();
            var proteinAligner       = new ProteinAligner();
            var proteinAlignerResult = proteinAligner.AlignSubsequence(peptide1, startIndex1, peptide2, startIndex2, length);
            var alignmentTransform   = proteinAlignerResult.Transformation;

            peptide2.Molecule.Atoms
            .Where(atom => atom.IsPositioned)
            .ForEach(atom =>
            {
                atom.IsPositionFixed = false;
                atom.Position        = alignmentTransform.Apply(atom.Position.In(SIPrefix.Pico, Unit.Meter)).To(SIPrefix.Pico, Unit.Meter);
            });
            var repositionedPdb = PdbSerializer.Serialize(pdbCode2, peptide2);

            File.Copy(pdbFile1, Path.Combine(outputDirectory, $@"pdb{pdbCode1}.ent"), true);
            File.WriteAllText(
                Path.Combine(outputDirectory, $@"pdb{pdbCode2}_repositioned_{pdbCode1}_sub{startIndex1}-{startIndex2}-{length}.ent"),
                repositionedPdb);
        }
        public void Debug()
        {
            var aligner   = new ProteinAligner();
            var sequence1 = new List <AminoAcidName>
            {
                AminoAcidName.Alanine,
                AminoAcidName.Glutamine,
                AminoAcidName.Glycine,
                AminoAcidName.Isoleucine
            };
            var sequence2 = new List <AminoAcidName>
            {
                AminoAcidName.Histidine,
                AminoAcidName.Glycine,
                AminoAcidName.Isoleucine,
                AminoAcidName.Proline,
                AminoAcidName.Methionine
            };
            var alignmentResult = SequenceAligner.Align(sequence1, sequence2);

            Assert.That(alignmentResult, Is.Not.Null);
        }
        public void AlignAllModelsOfProtein(string proteinName, bool storeIndividualAlignedPdb)
        {
            var outputDirectory = Path.Combine(@"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\AlignedProteins", proteinName);

            if (!Directory.Exists(outputDirectory))
            {
                Directory.CreateDirectory(outputDirectory);
            }
            var proteinListDirectory = @"G:\Projects\HumanGenome\Protein-PDBs\HumanProteins\SingleChain\FullyPositioned\ByProtein";
            var pdbListFile          = Path.Combine(proteinListDirectory, proteinName + ".csv");
            var pdbFiles             = File.ReadLines(pdbListFile).ToList();
            var firstPeptide         = PdbReader.ReadFile(pdbFiles.First()).Models.First().Chains.First();

            File.Copy(pdbFiles.First(), Path.Combine(outputDirectory, Path.GetFileName(pdbFiles.First())), true);
            var proteinAligner = new ProteinAligner();
            var combinedModels = new List <Peptide> {
                firstPeptide
            };
            var modelErrors = new Dictionary <string, UnitValue> {
                { pdbFiles.First(), 0.To(Unit.Meter) }
            };

            foreach (var pdbFile in pdbFiles.Skip(1))
            {
                var peptide = PdbReader.ReadFile(pdbFile).Models.First().Chains.First();
                var proteinAlignerResult = proteinAligner.Align(firstPeptide, peptide);
                var alignmentTransform   = proteinAlignerResult.Transformation;
                peptide.Molecule.Atoms
                .Where(atom => atom.IsPositioned)
                .ForEach(atom =>
                {
                    atom.IsPositionFixed = false;
                    atom.Position        = alignmentTransform.Apply(atom.Position.In(SIPrefix.Pico, Unit.Meter)).To(SIPrefix.Pico, Unit.Meter);
                });
                var modelError = proteinAlignerResult.IsTransformationValid
                    ? proteinAlignerResult.AveragePositionError
                    : double.PositiveInfinity.To(Unit.Meter);
                modelErrors.Add(pdbFile, modelError);
                combinedModels.Add(peptide);
                if (storeIndividualAlignedPdb)
                {
                    var pdbId           = Path.GetFileNameWithoutExtension(pdbFile).Replace("pdb", "");
                    var repositionedPdb = PdbSerializer.Serialize(pdbId, peptide);
                    File.WriteAllText(
                        Path.Combine(outputDirectory, $"pdb{pdbId}.ent"),
                        repositionedPdb);
                }
            }

            var medianError = modelErrors.Values.Select(x => x.In(SIPrefix.Pico, Unit.Meter)).Median();
            var stdError    = modelErrors.Values
                              .Select(x => x.In(SIPrefix.Pico, Unit.Meter))
                              .Average(x => x.Square()).Sqrt();
            var validModels = pdbFiles
                              .Select((pdbFile, idx) => new
            {
                PdbFile = pdbFile,
                Model   = combinedModels[idx],
                Error   = modelErrors[pdbFile].In(SIPrefix.Pico, Unit.Meter)
            })
                              .Where(x => x.Error < Math.Min(medianError + 2 * stdError, 1000))
                              .Select(x => x.Model)
                              .ToArray();

            var combinedPdb = PdbSerializer.Serialize("1234", validModels);

            File.WriteAllText(
                Path.Combine(outputDirectory, "pdb_combined.ent"),
                combinedPdb);
            File.WriteAllLines(
                Path.Combine(outputDirectory, "averageError.csv"),
                modelErrors.Select(kvp => $"{kvp.Key};{kvp.Value.In(SIPrefix.Pico, Unit.Meter)}"));
        }