예제 #1
0
 /// <summary>
 ///     This method returns the percentage of the given Amino Acid in the entire UniProt database.
 /// </summary>
 /// <param name="aminoAcidCode"></param>
 /// <returns></returns>
 public static decimal AminoAcidCompositionInPercent(string aminoAcidCode)
 {
     if (aminoAcidCode == null)
     {
         throw new ArgumentNullException(nameof(aminoAcidCode));
     }
     return(AminoAcidCompositionInPercent(AminoAcidConversions.AminoAcidNameToNumber(aminoAcidCode)));
 }
예제 #2
0
        public void IncrementAminoAcidCount(char aminoAcidCode, decimal incrementValue = 1)
        {
            if (!ParameterValidation.IsAminoAcidCodeValid(aminoAcidCode))
            {
                throw new ArgumentOutOfRangeException(nameof(aminoAcidCode));
            }

            IncrementAminoAcidCount(AminoAcidConversions.AminoAcidNameToNumber(aminoAcidCode), incrementValue);
        }
예제 #3
0
        public void IncrementAminoAcidCount(string aminoAcidCode, decimal incrementValue = 1)
        {
            if (string.IsNullOrEmpty(aminoAcidCode))
            {
                throw new ArgumentOutOfRangeException(nameof(aminoAcidCode));
            }

            IncrementAminoAcidCount(AminoAcidConversions.AminoAcidNameToNumber(aminoAcidCode), incrementValue);
        }
        /// <summary>
        ///     AminoAcidColorGroupsHeatMapTsv
        /// </summary>
        /// <returns></returns>
        public List <SpreadsheetCell[]> SpreadsheetAminoAcidColorGroupsHeatMap(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups)
        {
            lock (_lockAminoAcidToAminoAcid)
            {
                var matrix = AminoAcidToAminoAcid[(int)enumAminoAcidGroups];

                var rows = new List <List <SpreadsheetCell> >();

                var headerRowStrings = new List <SpreadsheetCell> {
                    new SpreadsheetCell()
                };


                for (var columnIndex = 0; columnIndex < matrix.GetLength(1); columnIndex++)
                {
                    if (enumAminoAcidGroups == AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids)
                    {
                        headerRowStrings.Add(new SpreadsheetCell(AminoAcidConversions.AminoAcidNumberToCode1L(columnIndex + 1)));
                    }
                    else
                    {
                        headerRowStrings.Add(new SpreadsheetCell(columnIndex + 1));
                    }
                }

                rows.Add(headerRowStrings);

                for (var rowIndex = 0; rowIndex < matrix.GetLength(0); rowIndex++)
                {
                    var row = new List <SpreadsheetCell>();

                    if (enumAminoAcidGroups == AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids)
                    {
                        row.Add(new SpreadsheetCell(AminoAcidConversions.AminoAcidNumberToCode1L(rowIndex + 1)));
                    }
                    else
                    {
                        row.Add(new SpreadsheetCell(rowIndex + 1));
                    }


                    for (var columnIndex = 0; columnIndex < matrix.GetLength(1); columnIndex++)
                    {
                        row.Add(new SpreadsheetCell(matrix[rowIndex, columnIndex]));
                    }

                    rows.Add(row);
                }

                return(rows.Select(a => a.ToArray()).ToList());
            }
        }
예제 #5
0
        public static string FindProteinInterfaceAminoAcidCommonPropertiesMotif(List <string> proteinInterfaceSequenceList, AminoAcidPropertyMatchType aminoAcidPropertyMatchType, double matchValue = 0)
        {
            if (proteinInterfaceSequenceList == null)
            {
                throw new ArgumentNullException(nameof(proteinInterfaceSequenceList));
            }

            var totalProteinInterfaces = proteinInterfaceSequenceList.Count;

            var proteinInterfaceLength = proteinInterfaceSequenceList.Select(a => a.Length).Max();

            var aminoAcidObjects = new AminoAcidProperties <bool> [TotalAminoAcids];

            for (var aaIndex = 0; aaIndex < aminoAcidObjects.Length; aaIndex++)
            {
                aminoAcidObjects[aaIndex] = AminoAcidConversions.AminoAcidNumberToAminoAcidObject(aaIndex + 1);
            }

            var aminoAcidPositionCount = new AminoAcidProperties <int> [proteinInterfaceLength];

            for (var position = 0; position < aminoAcidPositionCount.Length; position++)
            {
                aminoAcidPositionCount[position] = new AminoAcidProperties <int>();
            }

            var motifs = new List <string>();

            for (int positionIndex = 0; positionIndex < proteinInterfaceLength; positionIndex++)
            {
                string positionMotif = "";

                foreach (var aaCode in proteinInterfaceSequenceList.Where(a => a.Length > positionIndex).Select(a => a[positionIndex]).Distinct().ToArray())
                {
                    if (!char.IsLetterOrDigit(aaCode) || NonStandardAminoAcids.Contains(aaCode))
                    {
                        continue;
                    }

                    var aminoAcid = aminoAcidObjects[AminoAcidConversions.AminoAcidNameToNumber(aaCode) - 1];

                    var matches = AminoAcidConversions.ListAminoAcidsByProperty(aminoAcid, aminoAcidPropertyMatchType, matchValue);

                    positionMotif += string.Join("", matches.Where(a => !positionMotif.Contains(a) && !NonStandardAminoAcids.Contains(a)).ToArray());
                }

                motifs.Add(positionMotif);
            }

            return(MotifFormatter(motifs));
        }
예제 #6
0
        public static SpreadsheetCell[] SpreadsheetTitleRow()
        {
            var titleRow = new List <SpreadsheetCell>();

            titleRow.Add(new SpreadsheetCell("Protein ID"));
            titleRow.Add(new SpreadsheetCell("Chain ID"));

            int totalAminoAcids = AminoAcidTotals.TotalAminoAcids();

            for (int index = 0; index < totalAminoAcids; index++)
            {
                titleRow.Add(new SpreadsheetCell(AminoAcidConversions.AminoAcidNumberToCode1L(index + 1)));
            }

            titleRow.Add(new SpreadsheetCell("Total"));

            return(titleRow.ToArray());
        }
        public SpreadsheetCell[] SpreadsheetDataRow()
        {
            var atomPoint1 = PointConversions.AtomPoint3D(InteractingAtomPair.Atom1);
            var atomPoint2 = PointConversions.AtomPoint3D(InteractingAtomPair.Atom2);

            var row = new[]
            {
                new SpreadsheetCell(ProteinId),
                new SpreadsheetCell(InteractingAtomPair.Distance),
                new SpreadsheetCell("-"),
                new SpreadsheetCell(InteractingAtomPair.Atom1.chainID.FieldValue),
                new SpreadsheetCell(ProteinInterfaceIdA),
                new SpreadsheetCell(InteractingAtomPair.Atom1.resSeq.FieldValue),
                new SpreadsheetCell(InteractingAtomPair.Atom1.name.FieldValue),
                new SpreadsheetCell(AminoAcidConversions.AminoAcidNameToCode1L(InteractingAtomPair.Atom1.resName.FieldValue)),
                new SpreadsheetCell(AminoAcidGroups.AminoAcidGroups.GetSubgroupColorNames(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.Physicochemical)[AminoAcidGroups.AminoAcidGroups.ConvertAminoAcidNameCodeToSubgroupNumbers(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.Physicochemical, InteractingAtomPair.Atom1.resName.FieldValue)[0]]),
                new SpreadsheetCell(AminoAcidGroups.AminoAcidGroups.GetSubgroupColorNames(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.Hydrophobicity)[AminoAcidGroups.AminoAcidGroups.ConvertAminoAcidNameCodeToSubgroupNumbers(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.Hydrophobicity, InteractingAtomPair.Atom1.resName.FieldValue)[0]]),
                new SpreadsheetCell(AminoAcidGroups.AminoAcidGroups.GetSubgroupColorNames(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.PdbSum)[AminoAcidGroups.AminoAcidGroups.ConvertAminoAcidNameCodeToSubgroupNumbers(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.PdbSum, InteractingAtomPair.Atom1.resName.FieldValue)[0]]),
                new SpreadsheetCell(AminoAcidGroups.AminoAcidGroups.GetSubgroupColorNames(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.UniProtKb)[AminoAcidGroups.AminoAcidGroups.ConvertAminoAcidNameCodeToSubgroupNumbers(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.UniProtKb, InteractingAtomPair.Atom1.resName.FieldValue)[0]]),
                new SpreadsheetCell(atomPoint1.X),
                new SpreadsheetCell(atomPoint1.Y),
                new SpreadsheetCell(atomPoint1.Z),
                new SpreadsheetCell("<-->"),
                new SpreadsheetCell(InteractingAtomPair.Atom2.chainID.FieldValue),
                new SpreadsheetCell(ProteinInterfaceIdB),
                new SpreadsheetCell(InteractingAtomPair.Atom2.resSeq.FieldValue),
                new SpreadsheetCell(InteractingAtomPair.Atom2.name.FieldValue),
                new SpreadsheetCell(AminoAcidConversions.AminoAcidNameToCode1L(InteractingAtomPair.Atom2.resName.FieldValue)),
                new SpreadsheetCell(AminoAcidGroups.AminoAcidGroups.GetSubgroupColorNames(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.Physicochemical)[AminoAcidGroups.AminoAcidGroups.ConvertAminoAcidNameCodeToSubgroupNumbers(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.Physicochemical, InteractingAtomPair.Atom2.resName.FieldValue)[0]]),
                new SpreadsheetCell(AminoAcidGroups.AminoAcidGroups.GetSubgroupColorNames(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.Hydrophobicity)[AminoAcidGroups.AminoAcidGroups.ConvertAminoAcidNameCodeToSubgroupNumbers(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.Hydrophobicity, InteractingAtomPair.Atom2.resName.FieldValue)[0]]),
                new SpreadsheetCell(AminoAcidGroups.AminoAcidGroups.GetSubgroupColorNames(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.PdbSum)[AminoAcidGroups.AminoAcidGroups.ConvertAminoAcidNameCodeToSubgroupNumbers(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.PdbSum, InteractingAtomPair.Atom2.resName.FieldValue)[0]]),
                new SpreadsheetCell(AminoAcidGroups.AminoAcidGroups.GetSubgroupColorNames(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.UniProtKb)[AminoAcidGroups.AminoAcidGroups.ConvertAminoAcidNameCodeToSubgroupNumbers(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.UniProtKb, InteractingAtomPair.Atom2.resName.FieldValue)[0]]),
                new SpreadsheetCell(atomPoint2.X),
                new SpreadsheetCell(atomPoint2.Y),
                new SpreadsheetCell(atomPoint2.Z)
            };

            return(row);
        }
예제 #8
0
        /// <summary>
        ///     This method returns a spreadsheet heat map of the entire UniProt sequence database.
        /// </summary>
        /// <returns></returns>
        public static List <SpreadsheetCell[]> UniProtHeatMapSpreadsheet()
        {
            var spreadsheet = new List <SpreadsheetCell[]>();

            //spreadsheet.Add("HM Entire UniProt Database"); // filename / worksheet name

            spreadsheet.Add(new[] { new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database Composition (Converted To Matrix)") }); // spreadsheet title

            //spreadsheet.Add(new[] {new SpreadsheetCell(string.Empty), });
            //spreadsheet.Add(new[] {new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database - A to Z") }); // section title
            //spreadsheet.AddRange(AminoAcidCompositionAsMatrix().SpreadsheetAminoAcidColorGroupsHeatMap());

            foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
            {
                spreadsheet.Add(new[] { new SpreadsheetCell(string.Empty), });
                spreadsheet.Add(new[] { new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database - Acid Groups " + enumAminoAcidGroups) }); // section title
                spreadsheet.AddRange(AminoAcidCompositionAsMatrix().SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups));
            }

            spreadsheet.Add(new[] { new SpreadsheetCell(string.Empty), });
            spreadsheet.Add(new[] { new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database - Original Data - A to Z") }); // section title

            AminoAcidChainComposition c = AminoAcidCompositionAsChain();

            var chain = new List <SpreadsheetCell>();

            chain.Add(new SpreadsheetCell(string.Empty));
            for (int i = 0; i < AminoAcidTotals.TotalAminoAcids(); i++)
            {
                chain.Add(new SpreadsheetCell(AminoAcidConversions.AminoAcidNumberToCode1L(i + 1)));
            }
            spreadsheet.Add(chain.ToArray());

            chain = new List <SpreadsheetCell>();
            chain.Add(new SpreadsheetCell(string.Empty));
            for (int i = 0; i < AminoAcidTotals.TotalAminoAcids(); i++)
            {
                chain.Add(new SpreadsheetCell(c.AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids][i]));
            }
            spreadsheet.Add(chain.ToArray());

            foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
            {
                spreadsheet.Add(new[] { new SpreadsheetCell(string.Empty), });
                spreadsheet.Add(new[] { new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database - Original Data - Acid Groups " + enumAminoAcidGroups) }); // section title

                var chainGroups = new List <SpreadsheetCell>();
                chainGroups.Add(new SpreadsheetCell(string.Empty));
                for (int i = 0; i < AminoAcidGroups.AminoAcidGroups.GetTotalSubgroups(enumAminoAcidGroups); i++)
                {
                    chainGroups.Add(new SpreadsheetCell((i + 1)));
                }
                spreadsheet.Add(chainGroups.ToArray());

                chainGroups = new List <SpreadsheetCell>();
                chainGroups.Add(new SpreadsheetCell(string.Empty));
                for (int i = 0; i < AminoAcidGroups.AminoAcidGroups.GetTotalSubgroups(enumAminoAcidGroups); i++)
                {
                    chainGroups.Add(new SpreadsheetCell(c.AminoAcidGroupsCount[(int)enumAminoAcidGroups][i]));
                }
                spreadsheet.Add(chainGroups.ToArray());
            }

            return(spreadsheet);
        }
        public void TestListAminoAcidsByProperty()
        {
            var x = new AminoAcidProperties<string>()
            {
                //Acidic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Acidic = true}),
                Aliphatic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Aliphatic = true}),
                Aromatic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Aromatic = true}),
                Charged = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Charged = true}),
                Hydrophobic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Hydrophobic = true}),
                Hydroxylic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Hydroxylic = true}),
                Negative = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Negative = true}),
                Polar = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Polar = true}),
                Positive = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Positive = true}),
                Small = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Small = true}),
                Sulphur = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Sulphur = true}),
                Tiny = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() { Tiny = true }),
                
            };

            var uncommon = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {}, AminoAcidPropertyMatchType.AllMatch);

            var y = new AminoAcidProperties<string>()
            {

            };
            

            for (var i = 0; i < AminoAcidTotals.TotalAminoAcids(); i++)
            {
                var aa = AminoAcidConversions.AminoAcidNumberToAminoAcidObject(i + 1);

                //if (aa.Acidic) y.Acidic += aa.Code1L;
                if (aa.Aliphatic) y.Aliphatic += aa.Code1L;
                if (aa.Aromatic) y.Aromatic += aa.Code1L;
                if (aa.Charged) y.Charged += aa.Code1L;
                if (aa.Hydrophobic) y.Hydrophobic += aa.Code1L;
                if (aa.Hydroxylic) y.Hydroxylic += aa.Code1L;
                if (aa.Negative) y.Negative += aa.Code1L;
                if (aa.Polar) y.Polar += aa.Code1L;
                if (aa.Positive) y.Positive += aa.Code1L;
                if (aa.Small) y.Small += aa.Code1L;
                if (aa.Sulphur) y.Sulphur += aa.Code1L;
                if (aa.Tiny) y.Tiny += aa.Code1L;
            }

            //Console.WriteLine("Acidic: " + x.Acidic);
            Console.WriteLine("Aliphatic: " + x.Aliphatic);
            Console.WriteLine("Aromatic: " + x.Aromatic);
            Console.WriteLine("Charged: " + x.Charged);
            Console.WriteLine("Hydrophobic: " + x.Hydrophobic);
            Console.WriteLine("Hydroxylic: " + x.Hydroxylic);
            Console.WriteLine("Negative: " + x.Negative);
            Console.WriteLine("Polar: " + x.Polar);
            Console.WriteLine("Positive: " + x.Positive);
            Console.WriteLine("Small: " + x.Small);
            Console.WriteLine("Sulphur: " + x.Sulphur);
            Console.WriteLine("Tiny: " + x.Tiny);
            Console.WriteLine("Others: " + uncommon);

            //Assert.AreEqual(string.Join(" ", y.Acidic.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Acidic.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Aliphatic.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Aliphatic.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Aromatic.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Aromatic.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Charged.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Charged.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Hydrophobic.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Hydrophobic.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Hydroxylic.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Hydroxylic.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Negative.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Negative.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Polar.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Polar.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Positive.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Positive.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Small.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Small.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Sulphur.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Sulphur.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Tiny.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Tiny.OrderBy(a=>a).ToArray()));
        }
예제 #10
0
        static void Main(string[] args)
        {
            // the indexes of data, contacts1 and contacts2 all match

            var data = MultiBindingInterface.LoadAuthorData(@"c:\multibinding\multibinding.csv", @"c:\multibinding\multibinding_homolog_clusters.csv");

            var contactsPartner1 =
                data.Select(
                    a =>
            {
                var x = ProteinBioClass.AtomPair.LoadAtomPairList(@"C:\multibinding\contacts\contacts_pdb" +
                                                                  a.InteractionChainsPdb1.ToUpperInvariant() + ".pdb")

                        .Where(
                    b =>
                    (b.Atom1.chainID.FieldValue.ToUpperInvariant()[0] ==
                     a.InteractionChainsPdb1Chain1
                     &&
                     b.Atom2.chainID.FieldValue.ToUpperInvariant()[0] ==
                     a.InteractionChainsPdb1Chain2)
                    ||
                    (b.Atom1.chainID.FieldValue.ToUpperInvariant()[0] ==
                     a.InteractionChainsPdb1Chain2
                     &&
                     b.Atom2.chainID.FieldValue.ToUpperInvariant()[0] ==
                     a.InteractionChainsPdb1Chain1)).ToList();

                x = x.Select(c =>
                {
                    if (c.Atom1.chainID.FieldValue.ToUpperInvariant()[0] == a.InteractionChainsPdb1Chain2)
                    {
                        c.SwapAtoms();
                    }

                    return(c);
                }).ToList();

                return(x);
            }).ToList();

            var contactsPartner2 =
                data.Select(
                    a =>
            {
                var x = ProteinBioClass.AtomPair.LoadAtomPairList(@"C:\multibinding\contacts\contacts_pdb" +
                                                                  a.InteractionChainsPdb2.ToUpperInvariant() + ".pdb")

                        .Where(
                    b =>
                    (b.Atom1.chainID.FieldValue.ToUpperInvariant()[0] ==
                     a.InteractionChainsPdb2Chain1
                     &&
                     b.Atom2.chainID.FieldValue.ToUpperInvariant()[0] ==
                     a.InteractionChainsPdb2Chain2)
                    ||
                    (b.Atom1.chainID.FieldValue.ToUpperInvariant()[0] ==
                     a.InteractionChainsPdb2Chain2
                     &&
                     b.Atom2.chainID.FieldValue.ToUpperInvariant()[0] ==
                     a.InteractionChainsPdb2Chain1)).ToList();

                x = x.Select(c =>
                {
                    if (c.Atom1.chainID.FieldValue.ToUpperInvariant()[0] == a.InteractionChainsPdb2Chain2)
                    {
                        c.SwapAtoms();
                    }

                    return(c);
                }).ToList();

                return(x);
            }).ToList();

            var interfacePartner1 = contactsPartner1.Select(a =>
            {
                var resSeqChain1 = a.Select(b => int.Parse(b.Atom1.resSeq.FieldValue)).ToList();
                var resSeqChain2 = a.Select(b => int.Parse(b.Atom2.resSeq.FieldValue)).ToList();

                if (resSeqChain1.Count > 0 && resSeqChain2.Count > 0)
                {
                    return(new Tuple <int, int, int, int>(resSeqChain1.Min(), resSeqChain1.Max(), resSeqChain2.Min(),
                                                          resSeqChain2.Max()));
                }
                else
                {
                    return(null);
                }
            }).ToList();


            var interfacePartner2 = contactsPartner2.Select(a =>
            {
                var resSeqChain1 = a.Select(b => int.Parse(b.Atom1.resSeq.FieldValue)).ToList();
                var resSeqChain2 = a.Select(b => int.Parse(b.Atom2.resSeq.FieldValue)).ToList();

                if (resSeqChain1.Count > 0 && resSeqChain2.Count > 0)
                {
                    return(new Tuple <int, int, int, int>(resSeqChain1.Min(), resSeqChain1.Max(), resSeqChain2.Min(),
                                                          resSeqChain2.Max()));
                }
                else
                {
                    return(null);
                }
            }).ToList();

            // var resultData = new List<MultiBindingInterface>();

            for (int index = 0; index < data.Count; index++)
            {
                var d   = data[index];
                var cp1 = contactsPartner1[index];
                var cp2 = contactsPartner2[index];
                var ip1 = interfacePartner1[index];
                var ip2 = interfacePartner2[index];

                if (d == null || cp1 == null || cp2 == null || ip1 == null || ip2 == null)
                {
                    continue;
                }
                if (cp1.Count == 0 || cp2.Count == 0)
                {
                    continue;
                }

                var p1c1_pdb = ProteinBioClass.PdbAtomicChains(@"c:\multibinding\pdb" + d.InteractionChainsPdb1 + ".pdb", new char[] { d.InteractionChainsPdb1Chain1 }, -1, -1, true);
                var p1c2_pdb = ProteinBioClass.PdbAtomicChains(@"c:\multibinding\pdb" + d.InteractionChainsPdb1 + ".pdb", new char[] { d.InteractionChainsPdb1Chain2 }, -1, -1, true);
                var p2c1_pdb = ProteinBioClass.PdbAtomicChains(@"c:\multibinding\pdb" + d.InteractionChainsPdb2 + ".pdb", new char[] { d.InteractionChainsPdb2Chain1 }, -1, -1, true);
                var p2c2_pdb = ProteinBioClass.PdbAtomicChains(@"c:\multibinding\pdb" + d.InteractionChainsPdb2 + ".pdb", new char[] { d.InteractionChainsPdb2Chain2 }, -1, -1, true);

                var p1c1_res_seq = p1c1_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList();
                var p1c2_res_seq = p1c2_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList();
                var p2c1_res_seq = p2c1_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList();
                var p2c2_res_seq = p2c2_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList();

                var cp1a1_res_seq = cp1.Select(a => int.Parse(a.Atom1.resSeq.FieldValue)).ToList();
                var cp1a2_res_seq = cp1.Select(a => int.Parse(a.Atom2.resSeq.FieldValue)).ToList();
                var cp2a1_res_seq = cp2.Select(a => int.Parse(a.Atom1.resSeq.FieldValue)).ToList();
                var cp2a2_res_seq = cp2.Select(a => int.Parse(a.Atom2.resSeq.FieldValue)).ToList();


                var cp1a1_min = cp1a1_res_seq.Min();
                var cp1a1_max = cp1a1_res_seq.Max();
                var cp1a2_min = cp1a2_res_seq.Min();
                var cp1a2_max = cp1a2_res_seq.Max();

                var cp2a1_min = cp2a1_res_seq.Min();
                var cp2a1_max = cp2a1_res_seq.Max();
                var cp2a2_min = cp2a2_res_seq.Min();
                var cp2a2_max = cp2a2_res_seq.Max();

                var cp1a1_best50_min           = int.MinValue;
                var cp1a1_best50_max           = int.MinValue;
                var cp1a1_best50_interactions  = int.MinValue;
                var cp1a1_best50_middle_finder = new List <Tuple <int, int, int> >();

                var interface_target_length = 50;

                for (var x = cp1a1_min - interface_target_length; x <= cp1a1_max; x++)
                {
                    if (Math.Abs(cp1a1_max - cp1a1_min) <= interface_target_length)
                    {
                        cp1a1_best50_min          = cp1a1_min;
                        cp1a1_best50_max          = cp1a1_max;
                        cp1a1_best50_interactions = cp1a1_res_seq.Count;
                        break;
                    }

                    var min = x;
                    var max = x + interface_target_length > cp1a1_max ? cp1a1_max : x + interface_target_length;

                    var best50 = cp1a1_res_seq.Count(a => a >= cp1a1_best50_min && a <= cp1a1_best50_max);

                    if (best50 == cp1a1_best50_interactions)
                    {
                        cp1a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                    }

                    if (cp1a1_best50_interactions == int.MinValue || best50 > cp1a1_best50_interactions)
                    {
                        cp1a1_best50_middle_finder.Clear();
                        cp1a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                        cp1a1_best50_min          = min;
                        cp1a1_best50_max          = max;
                        cp1a1_best50_interactions = best50;
                    }

                    if (x + interface_target_length >= cp1a1_max)
                    {
                        break;
                    }
                }

                if (cp1a1_best50_middle_finder.Count > 2)
                {
                    var middle = cp1a1_best50_middle_finder[cp1a1_best50_middle_finder.Count / 2];
                    cp1a1_best50_min          = middle.Item1;
                    cp1a1_best50_max          = middle.Item2;
                    cp1a1_best50_interactions = middle.Item3;
                }

                var cp1a2_best50_min           = int.MinValue;
                var cp1a2_best50_max           = int.MinValue;
                var cp1a2_best50_interactions  = int.MinValue;
                var cp1a2_best50_middle_finder = new List <Tuple <int, int, int> >();
                for (var x = cp1a2_min - interface_target_length; x <= cp1a2_max; x++)
                {
                    if (Math.Abs(cp1a2_max - cp1a2_min) <= interface_target_length)
                    {
                        cp1a2_best50_min          = cp1a2_min;
                        cp1a2_best50_max          = cp1a2_max;
                        cp1a2_best50_interactions = cp1a2_res_seq.Count;
                        break;
                    }

                    var min = x;
                    var max = x + interface_target_length > cp1a2_max ? cp1a2_max : x + interface_target_length;

                    var best50 = cp1a2_res_seq.Count(a => a >= cp1a2_best50_min && a <= cp1a2_best50_max);

                    if (best50 == cp1a2_best50_interactions)
                    {
                        cp1a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                    }

                    if (cp1a2_best50_interactions == int.MinValue || best50 > cp1a2_best50_interactions)
                    {
                        cp1a2_best50_middle_finder.Clear();
                        cp1a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                        cp1a2_best50_min          = min;
                        cp1a2_best50_max          = max;
                        cp1a2_best50_interactions = best50;
                    }

                    if (x + interface_target_length >= cp1a2_max)
                    {
                        break;
                    }
                }

                if (cp1a2_best50_middle_finder.Count > 2)
                {
                    var middle = cp1a2_best50_middle_finder[cp1a2_best50_middle_finder.Count / 2];
                    cp1a2_best50_min          = middle.Item1;
                    cp1a2_best50_max          = middle.Item2;
                    cp1a2_best50_interactions = middle.Item3;
                }


                var cp2a1_best50_min           = int.MinValue;
                var cp2a1_best50_max           = int.MinValue;
                var cp2a1_best50_interactions  = int.MinValue;
                var cp2a1_best50_middle_finder = new List <Tuple <int, int, int> >();
                for (var x = cp2a1_min - interface_target_length; x <= cp2a1_max; x++)
                {
                    if (Math.Abs(cp2a1_max - cp2a1_min) <= interface_target_length)
                    {
                        cp2a1_best50_min          = cp2a1_min;
                        cp2a1_best50_max          = cp2a1_max;
                        cp2a1_best50_interactions = cp2a1_res_seq.Count;
                        break;
                    }
                    var min = x;
                    var max = x + interface_target_length > cp2a1_max ? cp2a1_max : x + interface_target_length;

                    var best50 = cp2a1_res_seq.Count(a => a >= cp2a1_best50_min && a <= cp2a1_best50_max);

                    if (best50 == cp2a1_best50_interactions)
                    {
                        cp2a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                    }

                    if (cp2a1_best50_interactions == int.MinValue || best50 > cp2a1_best50_interactions)
                    {
                        cp2a1_best50_middle_finder.Clear();
                        cp2a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                        cp2a1_best50_min          = min;
                        cp2a1_best50_max          = max;
                        cp2a1_best50_interactions = best50;
                    }

                    if (x + interface_target_length >= cp2a1_max)
                    {
                        break;
                    }
                }

                if (cp2a1_best50_middle_finder.Count > 2)
                {
                    var middle = cp2a1_best50_middle_finder[cp2a1_best50_middle_finder.Count / 2];
                    cp2a1_best50_min          = middle.Item1;
                    cp2a1_best50_max          = middle.Item2;
                    cp2a1_best50_interactions = middle.Item3;
                }

                var cp2a2_best50_min           = int.MinValue;
                var cp2a2_best50_max           = int.MinValue;
                var cp2a2_best50_interactions  = int.MinValue;
                var cp2a2_best50_middle_finder = new List <Tuple <int, int, int> >();
                for (var x = cp2a2_min - interface_target_length; x <= cp2a2_max; x++)
                {
                    if (Math.Abs(cp2a2_max - cp2a2_min) <= interface_target_length)
                    {
                        cp2a2_best50_min          = cp2a2_min;
                        cp2a2_best50_max          = cp2a2_max;
                        cp2a2_best50_interactions = cp2a2_res_seq.Count;
                        break;
                    }
                    var min = x;
                    var max = x + interface_target_length > cp2a2_max ? cp2a2_max : x + interface_target_length;

                    var best50 = cp2a2_res_seq.Count(a => a >= cp2a2_best50_min && a <= cp2a2_best50_max);

                    if (best50 == cp2a2_best50_interactions)
                    {
                        cp2a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                    }

                    if (cp2a2_best50_interactions == int.MinValue || best50 > cp2a2_best50_interactions)
                    {
                        cp2a2_best50_middle_finder.Clear();
                        cp2a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                        cp2a2_best50_min          = min;
                        cp2a2_best50_max          = max;
                        cp2a2_best50_interactions = best50;
                    }

                    if (x + interface_target_length >= cp2a2_max)
                    {
                        break;
                    }
                }

                if (cp2a2_best50_middle_finder.Count > 2)
                {
                    var middle = cp2a2_best50_middle_finder[cp2a2_best50_middle_finder.Count / 2];
                    cp2a2_best50_min          = middle.Item1;
                    cp2a2_best50_max          = middle.Item2;
                    cp2a2_best50_interactions = middle.Item3;
                }

                var cp1a1_interface = string.Join("", p1c1_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp1a1_min && l <= cp1a1_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp1a2_interface = string.Join("", p1c2_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp1a2_min && l <= cp1a2_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp2a1_interface = string.Join("", p2c1_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp2a1_min && l <= cp2a1_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp2a2_interface = string.Join("", p2c2_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp2a2_min && l <= cp2a2_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp1a1_interface_interactions = new string('_', cp1a1_interface.Length);
                cp1a1_interface_interactions = string.Join("", cp1a1_interface_interactions.Select((a, i) => cp1a1_res_seq.Contains(i + cp1a1_min) ? "X" : "_").ToList());

                var cp1a2_interface_interactions = new string('_', cp1a2_interface.Length);
                cp1a2_interface_interactions = string.Join("", cp1a2_interface_interactions.Select((a, i) => cp1a2_res_seq.Contains(i + cp1a2_min) ? "X" : "_").ToList());

                var cp2a1_interface_interactions = new string('_', cp2a1_interface.Length);
                cp2a1_interface_interactions = string.Join("", cp2a1_interface_interactions.Select((a, i) => cp2a1_res_seq.Contains(i + cp2a1_min) ? "X" : "_").ToList());

                var cp2a2_interface_interactions = new string('_', cp2a2_interface.Length);
                cp2a2_interface_interactions = string.Join("", cp2a2_interface_interactions.Select((a, i) => cp2a2_res_seq.Contains(i + cp2a2_min) ? "X" : "_").ToList());

                var cp1a1_interactions = cp1a1_interface_interactions.Count(a => a == 'X');
                var cp1a2_interactions = cp1a2_interface_interactions.Count(a => a == 'X');
                var cp2a1_interactions = cp2a1_interface_interactions.Count(a => a == 'X');
                var cp2a2_interactions = cp2a2_interface_interactions.Count(a => a == 'X');

                var cp1a1_best50_interface = string.Join("", p1c1_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp1a1_best50_min && l <= cp1a1_best50_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp1a2_best50_interface = string.Join("", p1c2_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp1a2_best50_min && l <= cp1a2_best50_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp2a1_best50_interface = string.Join("", p2c1_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp2a1_best50_min && l <= cp2a1_best50_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp2a2_best50_interface = string.Join("", p2c2_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp2a2_best50_min && l <= cp2a2_best50_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp1a1_best50_interface_interactions = new string('_', cp1a1_best50_interface.Length);
                cp1a1_best50_interface_interactions = string.Join("", cp1a1_best50_interface_interactions.Select((a, i) => cp1a1_res_seq.Contains(i + cp1a1_best50_min) ? "X" : "_").ToList());

                var cp1a2_best50_interface_interactions = new string('_', cp1a2_best50_interface.Length);
                cp1a2_best50_interface_interactions = string.Join("", cp1a2_best50_interface_interactions.Select((a, i) => cp1a2_res_seq.Contains(i + cp1a2_best50_min) ? "X" : "_").ToList());

                var cp2a1_best50_interface_interactions = new string('_', cp2a1_best50_interface.Length);
                cp2a1_best50_interface_interactions = string.Join("", cp2a1_best50_interface_interactions.Select((a, i) => cp2a1_res_seq.Contains(i + cp2a1_best50_min) ? "X" : "_").ToList());

                var cp2a2_best50_interface_interactions = new string('_', cp2a2_best50_interface.Length);
                cp2a2_best50_interface_interactions = string.Join("", cp2a2_best50_interface_interactions.Select((a, i) => cp2a2_res_seq.Contains(i + cp2a2_best50_min) ? "X" : "_").ToList());

                d.Pdb1Chain1InterfaceStart    = ip1.Item1;
                d.Pdb1Chain1InterfaceEnd      = ip1.Item2;
                d.Pdb1Chain1TotalInteractions = cp1a1_interactions;
                d.Pdb1Chain1InterfaceSequence = cp1a1_interface;
                d.Pdb1Chain1InterfaceMask     = cp1a1_interface_interactions;

                d.Pdb1Chain1Best50InterfaceStart    = cp1a1_best50_min;
                d.Pdb1Chain1Best50InterfaceEnd      = cp1a1_best50_max;
                d.Pdb1Chain1Best50TotalInteractions = cp1a1_best50_interactions;
                d.Pdb1Chain1Best50InterfaceSequence = cp1a1_best50_interface;
                d.Pdb1Chain1Best50InterfaceMask     = cp1a1_best50_interface_interactions;

                d.Pdb1Chain2InterfaceStart    = ip1.Item3;
                d.Pdb1Chain2InterfaceEnd      = ip1.Item4;
                d.Pdb1Chain2TotalInteractions = cp1a2_interactions;
                d.Pdb1Chain2InterfaceSequence = cp1a2_interface;
                d.Pdb1Chain2InterfaceMask     = cp1a2_interface_interactions;

                d.Pdb1Chain2Best50InterfaceStart    = cp1a2_best50_min;
                d.Pdb1Chain2Best50InterfaceEnd      = cp1a2_best50_max;
                d.Pdb1Chain2Best50TotalInteractions = cp1a2_best50_interactions;
                d.Pdb1Chain2Best50InterfaceSequence = cp1a2_best50_interface;
                d.Pdb1Chain2Best50InterfaceMask     = cp1a2_best50_interface_interactions;

                d.Pdb2Chain1InterfaceStart    = ip2.Item1;
                d.Pdb2Chain1InterfaceEnd      = ip2.Item2;
                d.Pdb2Chain1TotalInteractions = cp2a1_interactions;
                d.Pdb2Chain1InterfaceSequence = cp2a1_interface;
                d.Pdb2Chain1InterfaceMask     = cp2a1_interface_interactions;

                d.Pdb2Chain1Best50InterfaceStart    = cp2a1_best50_min;
                d.Pdb2Chain1Best50InterfaceEnd      = cp2a1_best50_max;
                d.Pdb2Chain1Best50TotalInteractions = cp2a1_best50_interactions;
                d.Pdb2Chain1Best50InterfaceSequence = cp2a1_best50_interface;
                d.Pdb2Chain1Best50InterfaceMask     = cp2a1_best50_interface_interactions;

                d.Pdb2Chain2InterfaceStart    = ip2.Item3;
                d.Pdb2Chain2InterfaceEnd      = ip2.Item4;
                d.Pdb2Chain2TotalInteractions = cp2a2_interactions;
                d.Pdb2Chain2InterfaceSequence = cp2a2_interface;
                d.Pdb2Chain2InterfaceMask     = cp2a2_interface_interactions;

                d.Pdb2Chain2Best50InterfaceStart    = cp2a2_best50_min;
                d.Pdb2Chain2Best50InterfaceEnd      = cp2a2_best50_max;
                d.Pdb2Chain2Best50TotalInteractions = cp2a2_best50_interactions;
                d.Pdb2Chain2Best50InterfaceSequence = cp2a2_best50_interface;
                d.Pdb2Chain2Best50InterfaceMask     = cp2a2_best50_interface_interactions;
            }

            var output = data.Select(a => a.ToString()).ToList();

            output.Insert(0, MultiBindingInterface.Header());
            File.WriteAllLines(@"c:\multibinding\MultiBinding_parsed_results.csv", output);
            return;
        }
예제 #11
0
        static void Main(string[] args)
        {
            // the indexes of data, contacts1 and contacts2 all match

            var data = MultiBindingInterface.Load(@"c:\pdb\new_data_set.csv");

            var contactsPartner1 =
                data.Select(
                    a =>
            {
                var x = FindAtomicContacts.AtomPair.LoadAtomPairList(@"C:\pdb\new_data_set\contacts\contacts_" +
                                                                     a.InteractionChainsPdb1.ToLowerInvariant() + ".pdb")

                        .Where(
                    b =>
                    (b.Atom1.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb1Chain1.ToUpperInvariant()
                     &&
                     b.Atom2.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb1Chain2.ToUpperInvariant())
                    ||
                    (b.Atom1.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb1Chain2.ToUpperInvariant()
                     &&
                     b.Atom2.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb1Chain1.ToUpperInvariant())).ToList();

                x = x.Select(c =>
                {
                    if (c.Atom1.chainID.FieldValue.ToUpperInvariant() == a.InteractionChainsPdb1Chain2.ToUpperInvariant())
                    {
                        c.SwapAtoms();
                    }

                    return(c);
                }).ToList();

                return(x);
            }).ToList();

            var contactsPartner2 =
                data.Select(
                    a =>
            {
                var x = FindAtomicContacts.AtomPair.LoadAtomPairList(@"C:\pdb\new_data_set\contacts\contacts_" +
                                                                     a.InteractionChainsPdb2.ToLowerInvariant() + ".pdb")

                        .Where(
                    b =>
                    (b.Atom1.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb2Chain1.ToUpperInvariant()
                     &&
                     b.Atom2.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb2Chain2.ToUpperInvariant())
                    ||
                    (b.Atom1.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb2Chain2.ToUpperInvariant()
                     &&
                     b.Atom2.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb2Chain1.ToUpperInvariant())).ToList();

                x = x.Select(c =>
                {
                    if (c.Atom1.chainID.FieldValue.ToUpperInvariant() == a.InteractionChainsPdb2Chain2.ToUpperInvariant())
                    {
                        c.SwapAtoms();
                    }

                    return(c);
                }).ToList();

                return(x);
            }).ToList();

            var interfacePartner1 = contactsPartner1.Select(a =>
            {
                var resSeqChain1 = a.Select(b => int.Parse(b.Atom1.resSeq.FieldValue)).ToList();
                var resSeqChain2 = a.Select(b => int.Parse(b.Atom2.resSeq.FieldValue)).ToList();

                if (resSeqChain1.Count > 0 && resSeqChain2.Count > 0)
                {
                    return(new Tuple <int, int, int, int>(resSeqChain1.Min(), resSeqChain1.Max(), resSeqChain2.Min(),
                                                          resSeqChain2.Max()));
                }
                else
                {
                    return(null);
                }
            }).ToList();


            var interfacePartner2 = contactsPartner2.Select(a =>
            {
                var resSeqChain1 = a.Select(b => int.Parse(b.Atom1.resSeq.FieldValue)).ToList();
                var resSeqChain2 = a.Select(b => int.Parse(b.Atom2.resSeq.FieldValue)).ToList();

                if (resSeqChain1.Count > 0 && resSeqChain2.Count > 0)
                {
                    return(new Tuple <int, int, int, int>(resSeqChain1.Min(), resSeqChain1.Max(), resSeqChain2.Min(),
                                                          resSeqChain2.Max()));
                }
                else
                {
                    return(null);
                }
            }).ToList();

            var resultData = new List <string>();

            for (int index = 0; index < data.Count; index++)
            {
                var d   = data[index];
                var cp1 = contactsPartner1[index];
                var cp2 = contactsPartner2[index];
                var ip1 = interfacePartner1[index];
                var ip2 = interfacePartner2[index];

                if (d == null || cp1 == null || cp2 == null || ip1 == null || ip2 == null)
                {
                    continue;
                }
                if (cp1.Count == 0 || cp2.Count == 0)
                {
                    continue;
                }

                var p1c1_pdb = FindAtomicContacts.PdbAtomicChains(@"c:\pdb\new_data_set\" + d.InteractionChainsPdb1 + ".pdb", new string[] { d.InteractionChainsPdb1Chain1 }, -1, -1, true);
                var p1c2_pdb = FindAtomicContacts.PdbAtomicChains(@"c:\pdb\new_data_set\" + d.InteractionChainsPdb1 + ".pdb", new string[] { d.InteractionChainsPdb1Chain2 }, -1, -1, true);
                var p2c1_pdb = FindAtomicContacts.PdbAtomicChains(@"c:\pdb\new_data_set\" + d.InteractionChainsPdb2 + ".pdb", new string[] { d.InteractionChainsPdb2Chain1 }, -1, -1, true);
                var p2c2_pdb = FindAtomicContacts.PdbAtomicChains(@"c:\pdb\new_data_set\" + d.InteractionChainsPdb2 + ".pdb", new string[] { d.InteractionChainsPdb2Chain2 }, -1, -1, true);

                var p1c1_res_seq = p1c1_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList();
                var p1c2_res_seq = p1c2_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList();
                var p2c1_res_seq = p2c1_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList();
                var p2c2_res_seq = p2c2_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList();

                var cp1a1_res_seq = cp1.Select(a => int.Parse(a.Atom1.resSeq.FieldValue)).ToList();
                var cp1a2_res_seq = cp1.Select(a => int.Parse(a.Atom2.resSeq.FieldValue)).ToList();
                var cp2a1_res_seq = cp2.Select(a => int.Parse(a.Atom1.resSeq.FieldValue)).ToList();
                var cp2a2_res_seq = cp2.Select(a => int.Parse(a.Atom2.resSeq.FieldValue)).ToList();


                var cp1a1_min = cp1a1_res_seq.Min();
                var cp1a1_max = cp1a1_res_seq.Max();
                var cp1a2_min = cp1a2_res_seq.Min();
                var cp1a2_max = cp1a2_res_seq.Max();

                var cp2a1_min = cp2a1_res_seq.Min();
                var cp2a1_max = cp2a1_res_seq.Max();
                var cp2a2_min = cp2a2_res_seq.Min();
                var cp2a2_max = cp2a2_res_seq.Max();

                var cp1a1_best50_min           = int.MinValue;
                var cp1a1_best50_max           = int.MinValue;
                var cp1a1_best50_interactions  = int.MinValue;
                var cp1a1_best50_middle_finder = new List <Tuple <int, int, int> >();

                var interface_target_length = 50;

                for (var x = cp1a1_min - interface_target_length; x <= cp1a1_max; x++)
                {
                    if (Math.Abs(cp1a1_max - cp1a1_min) <= interface_target_length)
                    {
                        cp1a1_best50_min          = cp1a1_min;
                        cp1a1_best50_max          = cp1a1_max;
                        cp1a1_best50_interactions = cp1a1_res_seq.Count;
                        break;
                    }

                    var min = x;
                    var max = x + interface_target_length > cp1a1_max ? cp1a1_max : x + interface_target_length;

                    var best50 = cp1a1_res_seq.Count(a => a >= cp1a1_best50_min && a <= cp1a1_best50_max);

                    if (best50 == cp1a1_best50_interactions)
                    {
                        cp1a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                    }

                    if (cp1a1_best50_interactions == int.MinValue || best50 > cp1a1_best50_interactions)
                    {
                        cp1a1_best50_middle_finder.Clear();
                        cp1a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                        cp1a1_best50_min          = min;
                        cp1a1_best50_max          = max;
                        cp1a1_best50_interactions = best50;
                    }

                    if (x + interface_target_length >= cp1a1_max)
                    {
                        break;
                    }
                }

                if (cp1a1_best50_middle_finder.Count > 2)
                {
                    var middle = cp1a1_best50_middle_finder[cp1a1_best50_middle_finder.Count / 2];
                    cp1a1_best50_min          = middle.Item1;
                    cp1a1_best50_max          = middle.Item2;
                    cp1a1_best50_interactions = middle.Item3;
                }

                var cp1a2_best50_min           = int.MinValue;
                var cp1a2_best50_max           = int.MinValue;
                var cp1a2_best50_interactions  = int.MinValue;
                var cp1a2_best50_middle_finder = new List <Tuple <int, int, int> >();
                for (var x = cp1a2_min - interface_target_length; x <= cp1a2_max; x++)
                {
                    if (Math.Abs(cp1a2_max - cp1a2_min) <= interface_target_length)
                    {
                        cp1a2_best50_min          = cp1a2_min;
                        cp1a2_best50_max          = cp1a2_max;
                        cp1a2_best50_interactions = cp1a2_res_seq.Count;
                        break;
                    }

                    var min = x;
                    var max = x + interface_target_length > cp1a2_max ? cp1a2_max : x + interface_target_length;

                    var best50 = cp1a2_res_seq.Count(a => a >= cp1a2_best50_min && a <= cp1a2_best50_max);

                    if (best50 == cp1a2_best50_interactions)
                    {
                        cp1a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                    }

                    if (cp1a2_best50_interactions == int.MinValue || best50 > cp1a2_best50_interactions)
                    {
                        cp1a2_best50_middle_finder.Clear();
                        cp1a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                        cp1a2_best50_min          = min;
                        cp1a2_best50_max          = max;
                        cp1a2_best50_interactions = best50;
                    }

                    if (x + interface_target_length >= cp1a2_max)
                    {
                        break;
                    }
                }

                if (cp1a2_best50_middle_finder.Count > 2)
                {
                    var middle = cp1a2_best50_middle_finder[cp1a2_best50_middle_finder.Count / 2];
                    cp1a2_best50_min          = middle.Item1;
                    cp1a2_best50_max          = middle.Item2;
                    cp1a2_best50_interactions = middle.Item3;
                }


                var cp2a1_best50_min           = int.MinValue;
                var cp2a1_best50_max           = int.MinValue;
                var cp2a1_best50_interactions  = int.MinValue;
                var cp2a1_best50_middle_finder = new List <Tuple <int, int, int> >();
                for (var x = cp2a1_min - interface_target_length; x <= cp2a1_max; x++)
                {
                    if (Math.Abs(cp2a1_max - cp2a1_min) <= interface_target_length)
                    {
                        cp2a1_best50_min          = cp2a1_min;
                        cp2a1_best50_max          = cp2a1_max;
                        cp2a1_best50_interactions = cp2a1_res_seq.Count;
                        break;
                    }
                    var min = x;
                    var max = x + interface_target_length > cp2a1_max ? cp2a1_max : x + interface_target_length;

                    var best50 = cp2a1_res_seq.Count(a => a >= cp2a1_best50_min && a <= cp2a1_best50_max);

                    if (best50 == cp2a1_best50_interactions)
                    {
                        cp2a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                    }

                    if (cp2a1_best50_interactions == int.MinValue || best50 > cp2a1_best50_interactions)
                    {
                        cp2a1_best50_middle_finder.Clear();
                        cp2a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                        cp2a1_best50_min          = min;
                        cp2a1_best50_max          = max;
                        cp2a1_best50_interactions = best50;
                    }

                    if (x + interface_target_length >= cp2a1_max)
                    {
                        break;
                    }
                }

                if (cp2a1_best50_middle_finder.Count > 2)
                {
                    var middle = cp2a1_best50_middle_finder[cp2a1_best50_middle_finder.Count / 2];
                    cp2a1_best50_min          = middle.Item1;
                    cp2a1_best50_max          = middle.Item2;
                    cp2a1_best50_interactions = middle.Item3;
                }

                var cp2a2_best50_min           = int.MinValue;
                var cp2a2_best50_max           = int.MinValue;
                var cp2a2_best50_interactions  = int.MinValue;
                var cp2a2_best50_middle_finder = new List <Tuple <int, int, int> >();
                for (var x = cp2a2_min - interface_target_length; x <= cp2a2_max; x++)
                {
                    if (Math.Abs(cp2a2_max - cp2a2_min) <= interface_target_length)
                    {
                        cp2a2_best50_min          = cp2a2_min;
                        cp2a2_best50_max          = cp2a2_max;
                        cp2a2_best50_interactions = cp2a2_res_seq.Count;
                        break;
                    }
                    var min = x;
                    var max = x + interface_target_length > cp2a2_max ? cp2a2_max : x + interface_target_length;

                    var best50 = cp2a2_res_seq.Count(a => a >= cp2a2_best50_min && a <= cp2a2_best50_max);

                    if (best50 == cp2a2_best50_interactions)
                    {
                        cp2a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                    }

                    if (cp2a2_best50_interactions == int.MinValue || best50 > cp2a2_best50_interactions)
                    {
                        cp2a2_best50_middle_finder.Clear();
                        cp2a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                        cp2a2_best50_min          = min;
                        cp2a2_best50_max          = max;
                        cp2a2_best50_interactions = best50;
                    }

                    if (x + interface_target_length >= cp2a2_max)
                    {
                        break;
                    }
                }

                if (cp2a2_best50_middle_finder.Count > 2)
                {
                    var middle = cp2a2_best50_middle_finder[cp2a2_best50_middle_finder.Count / 2];
                    cp2a2_best50_min          = middle.Item1;
                    cp2a2_best50_max          = middle.Item2;
                    cp2a2_best50_interactions = middle.Item3;
                }

                var cp1a1_interface = string.Join("", p1c1_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp1a1_best50_min && l <= cp1a1_best50_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp1a2_interface = string.Join("", p1c2_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp1a2_best50_min && l <= cp1a2_best50_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp2a1_interface = string.Join("", p2c1_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp2a1_best50_min && l <= cp2a1_best50_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp2a2_interface = string.Join("", p2c2_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp2a2_best50_min && l <= cp2a2_best50_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp1a1_interface_interactions = new string('_', cp1a1_interface.Length);
                cp1a1_interface_interactions = string.Join("", cp1a1_interface_interactions.Select((a, i) => cp1a1_res_seq.Contains(i + cp1a1_best50_min) ? "X" : "_").ToList());

                var cp1a2_interface_interactions = new string('_', cp1a2_interface.Length);
                cp1a2_interface_interactions = string.Join("", cp1a2_interface_interactions.Select((a, i) => cp1a2_res_seq.Contains(i + cp1a2_best50_min) ? "X" : "_").ToList());

                var cp2a1_interface_interactions = new string('_', cp2a1_interface.Length);
                cp2a1_interface_interactions = string.Join("", cp2a1_interface_interactions.Select((a, i) => cp2a1_res_seq.Contains(i + cp2a1_best50_min) ? "X" : "_").ToList());

                var cp2a2_interface_interactions = new string('_', cp2a2_interface.Length);
                cp2a2_interface_interactions = string.Join("", cp2a2_interface_interactions.Select((a, i) => cp2a2_res_seq.Contains(i + cp2a2_best50_min) ? "X" : "_").ToList());

                resultData.Add(string.Join(",", new string[] {
                    d.DomainSuperFamily,
                    d.InteractionChainsPdb1,
                    d.InteractionChainsPdb1Chain1, "" + ip1.Item1, "" + ip1.Item2, "" + cp1a1_best50_min, "" + cp1a1_best50_max, "" + cp1a1_best50_interactions, cp1a1_interface, cp1a1_interface_interactions,
                    d.InteractionChainsPdb1Chain2, "" + ip1.Item3, "" + ip1.Item4, "" + cp1a2_best50_min, "" + cp1a2_best50_max, "" + cp1a2_best50_interactions, cp1a2_interface, cp1a2_interface_interactions,
                    d.InteractionChainsPdb2,
                    d.InteractionChainsPdb2Chain1, "" + ip2.Item1, "" + ip2.Item2, "" + cp2a1_best50_min, "" + cp2a1_best50_max, "" + cp2a1_best50_interactions, cp2a1_interface, cp2a1_interface_interactions,
                    d.InteractionChainsPdb2Chain2, "" + ip2.Item3, "" + ip2.Item4, "" + cp2a2_best50_min, "" + cp2a2_best50_max, "" + cp2a2_best50_interactions, cp2a2_interface, cp2a2_interface_interactions,
                }));
            }

            resultData.Insert(0, string.Join(",", new string[] {
                "super family",

                "partner 1 pdb id",
                "partner 1 chain id 1", "p1c1 interface start", "p1c1 interface end", "p1c1 best 50 start", "p1c1 best 50 end", "p1c1 best 50 interactions", "p1c1 interface seq", "p1c1 interface mask",
                "partner 1 chain id 2", "p1c2 interface start", "p1c1 interface end", "p1c2 best 50 start", "p1c2 best 50 end", "p1c2 best 50 interactions", "p1c2 interface seq", "p1c2 interface mask",
                "partner 2 pdb id",
                "partner 2 chain id 1", "p2c1 interface start", "p2c1 interface end", "p2c1 best 50 start", "p2c1 best 50 end", "p2c1 best 50 interactions", "p2c1 interface seq", "p2c1 interface mask",
                "partner 2 chain id 2", "p2c2 interface start", "p2c2 interface end", "p2c2 best 50 start", "p2c2 best 50 end", "p2c2 best 50 interactions", "p2c2 interface seq", "p2c2 interface mask",
            }));

            File.WriteAllLines(@"c:\pdb\new_data_set_results.csv", resultData);
            return;
        }
예제 #12
0
        public static List <Sequence> LoadStructureFile(ProteinBioClass.StructureChainListContainer pdb, string pdbId = "", char[] chainIdWhiteList = null, bool padMissingBool = true, int[] startResSeq = null, int[] endResSeq = null, char outsidePaddingChar = ' ', char insidePaddingChar = 'X')
        {
            var result = new List <Sequence>();

            List <ProteinBioClass.AtomListContainer> x = pdb.ChainList;
            var zeroBased = pdb.ChainList.Any(a => a.AtomList.Any(b => Int32.Parse(b.resSeq.FieldValue) == 0));

            if ((chainIdWhiteList != null && chainIdWhiteList.Length > 0) && (startResSeq != null && startResSeq.Length > 0) && (endResSeq != null && endResSeq.Length > 0))
            {
                for (int chainIndex = 0; chainIndex < pdb.ChainList.Count; chainIndex++)
                {
                    var chainId = pdb.ChainList[chainIndex].ChainId;

                    var chainStartResSeq = startResSeq[chainIdWhiteList.ToList().FindIndex(a => a == chainId)];
                    var chainEndResSeq   = endResSeq[chainIdWhiteList.ToList().FindIndex(a => a == chainId)];

                    pdb.ChainList[chainIndex].AtomList = pdb.ChainList[chainIndex].AtomList.Where(a => Int32.Parse(a.resSeq.FieldValue) >= chainStartResSeq && Int32.Parse(a.resSeq.FieldValue) <= chainEndResSeq).ToList();
                }
            }

            for (int index = 0; index < pdb.ChainList.Count; index++)
            {
                var suplAtoms = new List <ATOM_Record>();
                foreach (var atom in pdb.ChainList[index].AtomList)
                {
                    if (String.IsNullOrWhiteSpace(atom.iCode.FieldValue))
                    {
                        continue;
                    }

                    var resSeq   = atom.resSeq.FieldValue;
                    var aa       = atom.resName.FieldValue;
                    var atomName = atom.name.FieldValue;

                    var matches = pdb.ChainList[index].AtomList.Where(a => a.resSeq.FieldValue == atom.resSeq.FieldValue && a.resName.FieldValue == atom.resName.FieldValue && a.name.FieldValue == atom.name.FieldValue).ToList();

                    if (matches.Count <= 1)
                    {
                        continue;
                    }

                    var noIcode = matches.FindIndex(a => string.IsNullOrWhiteSpace(a.iCode.FieldValue));

                    if (noIcode > -1)
                    {
                        suplAtoms.AddRange(matches.Where((a, i) => i != noIcode).ToList());
                    }
                    else
                    {
                        suplAtoms.AddRange(matches.Skip(1).ToList());
                    }
                }
                //pdb.ChainList[index].AtomList = pdb.ChainList[index].AtomList.Where(a => String.IsNullOrWhiteSpace(a.iCode.FieldValue)).ToList();
                pdb.ChainList[index].AtomList.RemoveAll(a => suplAtoms.Contains(a));

                var chain = pdb.ChainList[index];
                var first = chain.AtomList.Min(a => Int32.Parse(a.resSeq.FieldValue));
                var last  = chain.AtomList.Max(a => Int32.Parse(a.resSeq.FieldValue));

                var resSeqOffset = first < 0 ? Math.Abs(first) + 1 : 0;

                var chainAa     = new char[last + (zeroBased ? 1 : 0) + resSeqOffset];
                var chainAaMask = new bool[last + (zeroBased ? 1 : 0) + resSeqOffset];

                for (var i = 0; i < chainAa.Length; i++)
                {
                    chainAa[i]     = outsidePaddingChar;
                    chainAaMask[i] = false;
                }

                foreach (var atom in chain.AtomList)
                {
                    var aa = atom.resName.FieldValue;
                    if (String.IsNullOrWhiteSpace(aa))
                    {
                        aa = "" + insidePaddingChar;
                    }

                    var res = Int32.Parse(atom.resSeq.FieldValue);

                    if (!zeroBased)
                    {
                        res = res - 1;
                    }

                    res += resSeqOffset;

                    var aa1l = AminoAcidConversions.AminoAcidNameToCode1L(aa);
                    chainAa[res]     = aa1l[0];
                    chainAaMask[res] = true;
                }

                for (var i = first; i <= last; i++)
                {
                    var res = i;

                    res += resSeqOffset;

                    if (!zeroBased)
                    {
                        res = res - 1;
                    }

                    if (!chainAaMask[res])
                    {
                        chainAa[res] = insidePaddingChar;
                    }
                }

                var chainAaStr = String.Join("", chainAa);
                if (!padMissingBool)
                {
                    chainAaStr = chainAaStr.Trim(outsidePaddingChar);
                }

                result.Add(new Sequence(">" + pdbId + ":" + chain.ChainId, chainAaStr));
            }

            return(result);
        }
 /// <summary>
 ///     IncrementAminoAcidCount
 /// </summary>
 /// <param name="aminoAcidCodeA"></param>
 /// <param name="aminoAcidCodeB"></param>
 /// <param name="incrementValue"></param>
 public void IncrementAminoAcidCount(string aminoAcidCodeA, string aminoAcidCodeB, decimal incrementValue = 1.0m)
 {
     IncrementAminoAcidCount(AminoAcidConversions.AminoAcidNameToNumber(aminoAcidCodeA), AminoAcidConversions.AminoAcidNameToNumber(aminoAcidCodeB), incrementValue);
 }
예제 #14
0
        static void Main(string[] args)
        {
            var pdbFolder = @"C:\ds96ub_homologs\";

            var homologClusterData = FindHomologsCluster.FindHomologsCluster.HomologClusterData.Load(@"c:\ds96ub_homologs\ds96ub_homologs_0.7.csv");

            var pdbFiles = Directory.GetFiles(pdbFolder, "*.pdb", SearchOption.TopDirectoryOnly);

            var pdbIdList = pdbFiles.Select(ProteinBioClass.PdbIdFromPdbFilename).ToList();

            // only ca-atoms, ters and endmdls
            var pdbAtomsText =
                pdbFiles.Select(
                    a =>
                    File.ReadAllLines(a)
                    .Where(b => (b.StartsWith("ATOM ") && b[13] == 'C' && b[14] == 'A') || /*b.StartsWith("TER ") ||*/ b.StartsWith("ENDMDL "))
                    .ToList()).ToList();

            // only first nmr model
            pdbAtomsText = pdbAtomsText.Select(a =>
            {
                var x = a.FindIndex(b => b.StartsWith("ENDMDL "));
                return(x == -1 ? a : a.GetRange(0, x - 1));
            }).ToList();

            var pdbAtoms = pdbAtomsText.Select(a => a.Select(b => new ATOM_Record(b)).ToList()).ToList();

            // get list of unique chain ids
            var pdbChainIds = pdbAtoms.Select((a, i) => a.Select(b => char.ToUpperInvariant(b.chainID.FieldValue[0])).ToList()).Distinct().ToList();

            var pdbIdChainIdList = new List <Tuple <string, char> >();

            for (var i = 0; i < pdbIdList.Count; i++)
            {
                pdbIdChainIdList.AddRange(pdbChainIds[i].Select(chainId => new Tuple <string, char>(pdbIdList[i], chainId)));
            }
            pdbIdChainIdList = pdbIdChainIdList.Distinct().ToList();

            // for each chain
            var pdbContacts =
                pdbIdChainIdList.Select(a =>
            {
                var x =
                    ProteinBioClass.AtomPair.LoadAtomPairList(@"C:\ds96ub_homologs\contacts\contacts_pdb" + a.Item1.ToUpperInvariant() + ".pdb")
                    .Where(b => char.ToUpperInvariant(b.Atom1.chainID.FieldValue[0]) == a.Item2 || char.ToUpperInvariant(b.Atom2.chainID.FieldValue[0]) == a.Item2)
                    .Select(c =>
                {
                    if (char.ToUpperInvariant(c.Atom1.chainID.FieldValue[0]) != a.Item2)
                    {
                        c.SwapAtoms();
                    }

                    return(c);
                }).ToList();

                return(x);
            }).ToList();



            // res min, res max, best min, best max, interface aa, interface mask
            var pdbInterfaces = new List <Ds93UbInterface>();

            var interface_target_length = 50;


            for (int index = 0; index < pdbContacts.Count; index++)
            {
                var pdbId   = pdbIdChainIdList[index].Item1;
                var chainId = pdbIdChainIdList[index].Item2;

                var pdbContact = pdbContacts[index];

                if (pdbContact.Count == 0)
                {
                    continue;
                }

                var contactChains = pdbContact.Where(a => char.ToUpperInvariant(a.Atom2.chainID.FieldValue[0]) != chainId).Select(a => char.ToUpperInvariant(a.Atom2.chainID.FieldValue[0])).Distinct().ToList();

                foreach (var contactChain in contactChains)
                {
                    var pdbContactsResSeqIds =
                        pdbContact.Where(a => char.ToUpperInvariant(a.Atom1.chainID.FieldValue[0]) == chainId &&
                                         char.ToUpperInvariant(a.Atom2.chainID.FieldValue[0]) == contactChain)
                        .Select(a => int.Parse(a.Atom1.resSeq.FieldValue))
                        .ToList();


                    var res_seq     = pdbContactsResSeqIds;
                    var min_res_seq = pdbContactsResSeqIds.Min();
                    var max_res_seq = pdbContactsResSeqIds.Max();

                    var best50_min           = int.MinValue;
                    var best50_max           = int.MinValue;
                    var best50_interactions  = int.MinValue;
                    var best50_middle_finder = new List <Tuple <int, int, int> >();
                    for (var x = min_res_seq - interface_target_length; x <= max_res_seq; x++)
                    {
                        if (Math.Abs(max_res_seq - min_res_seq) <= interface_target_length)
                        {
                            best50_min          = min_res_seq;
                            best50_max          = max_res_seq;
                            best50_interactions = res_seq.Count;
                            break;
                        }

                        var min = x;
                        var max = x + interface_target_length > max_res_seq ? max_res_seq : x + interface_target_length;

                        var best50 = res_seq.Count(a => a >= best50_min && a <= best50_max);

                        if (best50 == best50_interactions)
                        {
                            best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                        }

                        if (best50_interactions == int.MinValue || best50 > best50_interactions)
                        {
                            best50_middle_finder.Clear();
                            best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                            best50_min          = min;
                            best50_max          = max;
                            best50_interactions = best50;
                        }

                        if (x + interface_target_length >= max)
                        {
                            break;
                        }
                    }

                    if (best50_middle_finder.Count > 2)
                    {
                        var middle = best50_middle_finder[best50_middle_finder.Count / 2];
                        best50_min          = middle.Item1;
                        best50_max          = middle.Item2;
                        best50_interactions = middle.Item3;
                    }

                    var best50_interface_atoms = pdbAtoms[pdbIdList.IndexOf(pdbId)].Where(a =>
                    {
                        var l = int.Parse(a.resSeq.FieldValue);
                        var c = char.ToUpperInvariant(a.chainID.FieldValue[0]);
                        return(c == chainId && l >= best50_min && l <= best50_max);
                    }).ToList();

                    best50_interface_atoms = best50_interface_atoms.OrderBy(c => int.Parse(c.resSeq.FieldValue)).ToList();

                    var best50_interface = string.Join("", best50_interface_atoms.Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                    var best50_mask = new string('_', best50_interface.Length);
                    best50_mask = string.Join("",
                                              best50_mask.Select((a, i) => res_seq.Contains(i + best50_min) ? "X" : "_").ToList());

                    pdbInterfaces.Add(new Ds93UbInterface(pdbId, chainId, contactChain, min_res_seq, max_res_seq, best50_min,
                                                          best50_max, best50_interactions, best50_interface, best50_mask, -1, "", "", 0, -1, "", "", 0));
                }
            }

            var homologClusterIndexes = homologClusterData.Select(a => a.ClusterIndex).Distinct().ToList();

            var homologClusters = homologClusterIndexes.Select(a => homologClusterData.Where(b => b.ClusterIndex == a).ToList()).ToList();

            var pdbInterfacesSorted = homologClusters.Select(a => pdbInterfaces.Where(b => a.Any(c => c.PdbId == b.PdbId && (char.ToUpperInvariant(c.ChainId) == b.ChainId1 || char.ToUpperInvariant(c.ChainId) == b.ChainId2))).ToList()).ToList();

            var outputData = new List <string>();


            foreach (var clusterIndex in homologClusterIndexes)
            {
                var cluster = pdbInterfacesSorted[clusterIndex - 1];

                // currently, cluster is a list of chain1-->chain2 interfaces ... so the 'chain2' interface needs adding to the record



                foreach (var inf1 in cluster)
                {
                    var partner =
                        cluster.Where(a => a != inf1 && a.PdbId == inf1.PdbId && inf1.ChainId2 == a.ChainId1)
                        .OrderByDescending(
                            a => InterfaceOverlap(inf1.MinResSeq, inf1.MaxResSeq, a.MinResSeq, a.MaxResSeq))
                        .ToList();

                    var first = partner.FirstOrDefault();
                    if (first != null)
                    {
                        inf1.Partner1InterfaceAminoAcids       = first.InterfaceAminoAcids;
                        inf1.Partner1InterfaceInteractionsMask = first.InterfaceInteractionsMask;
                        inf1.Partner1InterfaceOverlap          = InterfaceOverlap(inf1.MinResSeq, inf1.MaxResSeq, first.MinResSeq, first.MaxResSeq);
                    }

                    var second = partner.ElementAtOrDefault(1);
                    if (second != null)
                    {
                        inf1.Partner2InterfaceAminoAcids       = second.InterfaceAminoAcids;
                        inf1.Partner2InterfaceInteractionsMask = second.InterfaceInteractionsMask;
                        inf1.Partner2InterfaceOverlap          = InterfaceOverlap(inf1.MinResSeq, inf1.MaxResSeq, second.MinResSeq, second.MaxResSeq);
                    }
                }

                cluster = cluster.Where(a => a.Partner1InterfaceOverlap > 0 || a.Partner2InterfaceOverlap > 0).ToList();

                /*
                 * var partners =
                 *  foreach (var inf2 in cluster)
                 *  {
                 *      if (inf1.PdbId!=inf2.PdbId) continue;
                 *
                 *      if (inf1==inf2) continue;
                 *
                 *      if (!(inf1.ChainId1==inf2.ChainId2 || inf1.ChainId2==inf2.ChainId1)) continue;
                 *
                 *      //
                 *      var overlap = InterfaceOverlap(inf1.MinResSeq, inf1.MaxResSeq, inf2.MinResSeq, inf2.MaxResSeq);
                 *
                 *      if (overlap > 0)
                 *      {
                 *          if (overlap > inf1.Partner1InterfaceOverlap)
                 *          {
                 *              inf1.Partner1InterfaceOverlap = overlap;
                 *              inf1.Partner1InterfaceAminoAcids = inf2.InterfaceAminoAcids;
                 *              inf1.Partner1InterfaceInteractionsMask = inf2.InterfaceInteractionsMask;
                 *          }
                 *
                 *          if (overlap > inf2.Partner1InterfaceOverlap)
                 *          {
                 *              inf2.Partner1InterfaceOverlap = overlap;
                 *              inf2.Partner1InterfaceAminoAcids = inf1.InterfaceAminoAcids;
                 *              inf2.Partner1InterfaceInteractionsMask = inf1.InterfaceInteractionsMask;
                 *          }
                 *      }
                 *  }
                 * }
                 */

                //var interfaces = cluster.Select(a => a.InterfaceAminoAcids).ToList();
                //interfaces = interfaces.Where(a => interfaces.Count(b => b == a) > 1).ToList();

                //cluster = cluster.Where(a => a.InterfaceAminoAcids.Length >= 5 && cluster.Count(b => b.InterfaceAminoAcids == a.InterfaceAminoAcids) > 1).ToList();
                cluster = cluster.Where(a => a.InterfaceAminoAcids.Length >= 5).ToList();

                var clusterInterfaces = cluster.Select(a => a.InterfaceAminoAcids).ToList();

                var homologInterfaces = new List <List <string> >();
                foreach (var inf1 in clusterInterfaces)
                {
                    var    highest_score = decimal.MinValue;
                    string highest_inf   = null;

                    foreach (var inf2 in clusterInterfaces)
                    {
                        if (inf1 == inf2)
                        {
                            continue;
                        }

                        var score = ProteinBioClass.AlignedSequenceSimilarityPercentage(inf1, inf2, ProteinBioClass.AlignmentType.NMW);
                        if (score.Score > highest_score)
                        {
                            highest_score = score.Score;
                            highest_inf   = inf2;
                        }
                    }
                    var y = homologInterfaces.FirstOrDefault(a => a.Contains(inf1) || a.Contains(highest_inf));
                    if (y != null)
                    {
                        if (!y.Contains(inf1))
                        {
                            y.Add(inf1);
                        }
                        if (!y.Contains(highest_inf))
                        {
                            y.Add(highest_inf);
                        }
                    }
                    else
                    {
                        var z = new List <string>();
                        z.Add(inf1);
                        z.Add(highest_inf);
                        homologInterfaces.Add(z);
                    }
                }

                foreach (var c in cluster)
                {
                    c.Partner1ClusterIndex = homologInterfaces.FindIndex(b => b.Contains(c.Partner1InterfaceAminoAcids));
                    c.Partner2ClusterIndex = homologInterfaces.FindIndex(b => b.Contains(c.Partner2InterfaceAminoAcids));
                }

                for (int index = 0; index < homologInterfaces.Count; index++)
                {
                    var homologInterface = homologInterfaces[index];



                    var cluster2 =
                        cluster.Where(a => homologInterface.Contains(a.InterfaceAminoAcids)
                                      )
                        .OrderBy(a => a.Partner1ClusterIndex)
                        .ThenBy(a => a.Partner2ClusterIndex)
                        .ThenBy(a => a.InterfaceAminoAcids)
                        .ThenBy(a => a.Partner1InterfaceAminoAcids)
                        .ThenBy(a => a.Partner2InterfaceAminoAcids)
                        .ToList();

                    var partners =
                        cluster2.Select(
                            a =>
                            new Tuple <string, string, string>(a.InterfaceAminoAcids, a.Partner1InterfaceAminoAcids,
                                                               a.Partner2InterfaceAminoAcids)).Distinct();

                    cluster2 =
                        partners.Select(
                            a =>
                            cluster2.FirstOrDefault(
                                b =>
                                b.InterfaceAminoAcids == a.Item1 && b.Partner1InterfaceAminoAcids == a.Item2 &&
                                b.Partner2InterfaceAminoAcids == a.Item3)).ToList();

                    outputData.Add("cluster " + clusterIndex + "." + index);
                    outputData.AddRange(cluster2.Select(a => a.ToString()).ToList());
                    outputData.Add("");
                }
            }

            File.WriteAllLines(@"c:\ds96ub_homologs\ds96ub_homologs_interfaces.csv", outputData);//pdbInterfaces.Select(a=>a.ToString()).ToList());
        }
예제 #15
0
        public static bool IsAminoAcidCodeValid(string aminoAcidNameOrCode)
        {
            int aminoAcidNumber = AminoAcidConversions.AminoAcidNameToNumber(aminoAcidNameOrCode);

            return(IsAminoAcidNumberValid(aminoAcidNumber));
        }
        public void TestAminoAcids()
        {
            //int intAcidic = 0;
            int intAliphatic   = 0;
            int intAromatic    = 0;
            int intCharged     = 0;
            int intHydrophobic = 0;
            int intHydroxylic  = 0;
            int intNegative    = 0;
            int intPolar       = 0;
            int intPositive    = 0;
            int intSmall       = 0;
            int intSulphur     = 0;
            int intTiny        = 0;

            //string strAcidic = "";
            string strAliphatic   = "";
            string strAromatic    = "";
            string strCharged     = "";
            string strHydrophobic = "";
            string strHydroxylic  = "";
            string strNegative    = "";
            string strPolar       = "";
            string strPositive    = "";
            string strSmall       = "";
            string strSulphur     = "";
            string strTiny        = "";

            for (var i = 1; i <= 26; i++)
            {
                var x = AminoAcidConversions.AminoAcidNumberToAminoAcidObject(i);

                //if (x.Acidic)
                //{
                //intAcidic++;
                //strAcidic += x.Code1L;
                //}

                if (x.Aliphatic)
                {
                    intAliphatic++;
                    strAliphatic += x.Code1L;
                }

                if (x.Aromatic)
                {
                    intAromatic++;
                    strAromatic += x.Code1L;
                }

                if (x.Charged)
                {
                    intCharged++;
                    strCharged += x.Code1L;
                }

                if (x.Hydrophobic)
                {
                    intHydrophobic++;
                    strHydrophobic += x.Code1L;
                }

                if (x.Hydroxylic)
                {
                    intHydroxylic++;
                    strHydroxylic += x.Code1L;
                }

                if (x.Negative)
                {
                    intNegative++;
                    strNegative += x.Code1L;
                }

                if (x.Polar)
                {
                    intPolar++;
                    strPolar += x.Code1L;
                }

                if (x.Positive)
                {
                    intPositive++;
                    strPositive += x.Code1L;
                }

                if (x.Small)
                {
                    intSmall++;
                    strSmall += x.Code1L;
                }

                if (x.Sulphur)
                {
                    intSulphur++;
                    strSulphur += x.Code1L;
                }

                if (x.Tiny)
                {
                    intTiny++;
                    strTiny += x.Code1L;
                }
            }

            //Console.WriteLine("Acidic: " + intAcidic + " " + strAcidic);
            Console.WriteLine("Aliphatic: " + intAliphatic + " " + strAliphatic);
            Console.WriteLine("Aromatic: " + intAromatic + " " + strAromatic);
            Console.WriteLine("Charged: " + intCharged + " " + strCharged);
            Console.WriteLine("Hydrophobic: " + intHydrophobic + " " + strHydrophobic);
            Console.WriteLine("Hydroxylic: " + intHydroxylic + " " + strHydroxylic);
            Console.WriteLine("Negative: " + intNegative + " " + strNegative);
            Console.WriteLine("Polar: " + intPolar + " " + strPolar);
            Console.WriteLine("Positive: " + intPositive + " " + strPositive);
            Console.WriteLine("Small: " + intSmall + " " + strSmall);
            Console.WriteLine("Sulphur: " + intSulphur + " " + strSulphur);
            Console.WriteLine("Tiny: " + intTiny + " " + strTiny);
        }
예제 #17
0
 public static int[] ConvertAminoAcidNameCodeToSubgroupNumbers(EnumAminoAcidGroups enumAminoAcidGroups, char aminoAcidNameCode)
 {
     return(ConvertAminoAcidNumberToSubgroupNumbers(enumAminoAcidGroups, AminoAcidConversions.AminoAcidNameToNumber(aminoAcidNameCode)));
 }
        public static string[,] Spreadsheet(List <MotifProfileSpreadsheetRecord> motifProfileSpreadsheetRecordList)
        {
            if (motifProfileSpreadsheetRecordList == null)
            {
                throw new ArgumentNullException(nameof(motifProfileSpreadsheetRecordList));
            }

            var result = new List <string[]>();

            var totalAminoAcids = AminoAcidTotals.TotalAminoAcids();

            var sheetHeader = new List <string>()
            {
                "Motif Name",
                "Motif Source",
                "Direction",
                "Total Found",
                //"Total Found In Heterodimers",
                //"Total Found In Homodimers",
                "Profile Position",
            };

            sheetHeader.AddRange(AminoAcidConversions.AminoAcidCodeArray1L());

            result.Add(sheetHeader.ToArray());

            foreach (var record in motifProfileSpreadsheetRecordList.OrderByDescending(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.TotalFound)))
            {
                result.Add(new string[] { });

                var recordHeader = new List <string>()
                {
                    record.MotifName,
                    record.MotifSource,
                    record.Direction,
                    record.TotalFound,
                    //record.TotalFoundInHeterodimers,
                    //record.TotalFoundInHomodimers,
                    "",
                };

                recordHeader.AddRange(AminoAcidConversions.AminoAcidCodeArray1L());

                result.Add(recordHeader.ToArray());

                for (var positionIndex = 0; positionIndex < record.AminoAcidProfile.Length; positionIndex++)
                {
                    var row = new string[sheetHeader.Count];

                    row[sheetHeader.IndexOf("Profile Position")] = "" + (positionIndex + 1);

                    for (var aaIndex = 0; aaIndex < record.AminoAcidProfile[positionIndex].Length; aaIndex++)
                    {
                        row[aaIndex + sheetHeader.IndexOf("Profile Position") + 1] = $"{record.AminoAcidProfile[positionIndex][aaIndex]:0.00}";
                    }

                    result.Add(row);
                }

                var rowAverage = new string[sheetHeader.Count];

                rowAverage[sheetHeader.IndexOf("Profile Position")] = "Average";

                for (var aaIndex = 0; aaIndex < record.AverageProfile.Length; aaIndex++)
                {
                    rowAverage[aaIndex + sheetHeader.IndexOf("Profile Position") + 1] = $"{record.AverageProfile[aaIndex]:0.00}";
                }

                result.Add(rowAverage);
            }


            return(ConvertTypes.StringJagged2DArrayTo2DArray(result.ToArray()));
        }
        public static MotifProfileSpreadsheetRecord Record(List <VectorProteinInterfaceWhole> vectorProteinInterfaceWholeList)
        {
            if (vectorProteinInterfaceWholeList == null || vectorProteinInterfaceWholeList.Count == 0)
            {
                return(null);
            }

            var maxProteinInterfaceLength = vectorProteinInterfaceWholeList.Select(a => a.ProteinInterfaceAminoAcids1L().Length).Max();

            var totalAminoAcids = AminoAcidTotals.TotalAminoAcids();

            var result = new MotifProfileSpreadsheetRecord
            {
                AminoAcidProfile = new decimal[maxProteinInterfaceLength][],
                AverageProfile   = new decimal[totalAminoAcids],
                TotalFound       = "" + vectorProteinInterfaceWholeList.Count,
            };

            var directionFwd = vectorProteinInterfaceWholeList.Count(a => !a.ReversedSequence);
            var directionRev = vectorProteinInterfaceWholeList.Count - directionFwd;

            if (directionFwd > 0 && directionRev == 0)
            {
                result.Direction = "Fwd";
            }
            else if (directionFwd == 0 && directionRev > 0)
            {
                result.Direction = "Rev";
            }
            else
            {
                result.Direction = "Mix";
            }

            for (var positionIndex = 0; positionIndex < result.AminoAcidProfile.Length; positionIndex++)
            {
                result.AminoAcidProfile[positionIndex] = new decimal[totalAminoAcids];
            }

            foreach (var record in vectorProteinInterfaceWholeList)
            {
                var aminoAcidCode1L = record.ProteinInterfaceAminoAcids1L();

                for (int positionIndex = 0; positionIndex < aminoAcidCode1L.Length; positionIndex++)
                {
                    var aa = aminoAcidCode1L[positionIndex];

                    var aaIndex = AminoAcidConversions.AminoAcidNameToNumber(aa) - 1;

                    result.AminoAcidProfile[positionIndex][aaIndex]++;
                    result.AverageProfile[aaIndex]++;
                }
            }

            for (var positionIndex = 0; positionIndex < result.AminoAcidProfile.Length; positionIndex++)
            {
                var positionTotal = result.AminoAcidProfile[positionIndex].Sum();

                for (var aaIndex = 0; aaIndex < totalAminoAcids; aaIndex++)
                {
                    result.AminoAcidProfile[positionIndex][aaIndex] = (result.AminoAcidProfile[positionIndex][aaIndex] / positionTotal) * 100;
                }
            }

            var averageTotal = result.AverageProfile.Sum();

            for (var aaIndex = 0; aaIndex < totalAminoAcids; aaIndex++)
            {
                result.AverageProfile[aaIndex] = averageTotal != 0 ? (result.AverageProfile[aaIndex] / averageTotal) * 100 : 0;
            }

            return(result);
        }
예제 #20
0
        /// <summary>
        ///     Load proteinInterface data from the PDB file based on a list of already detected proteinInterfaces.
        ///     The detected proteinInterfaces may be missing data such as other atoms or residues which are also in the proteinInterface but were not
        ///     directly interacting.
        ///     The positions and lengths of the proteinInterfaces are also calculated.
        /// </summary>
        /// <param name="pdbFilename"></param>
        /// <param name="pdbFileChains"></param>
        /// <param name="singularAaToAaInteractions"></param>
        /// <param name="proteinInterfacesClusteringResult"></param>
        /// <param name="detectedBestStages"></param>
        /// <param name="interactionBetweenProteinInterfacesContainer"></param>
        /// <returns></returns>
        public static List <ProteinInterfaceSequenceAndPositionData> AnalyseProteinInterfacesSequenceAndPositionData(
            string pdbFilename,
            Dictionary <string, List <string> > pdbIdChainIdList,
            ProteinChainListContainer pdbFileChains,
            ProteinChainListContainer singularAaToAaInteractions,
            ClusteringFullResultListContainer proteinInterfacesClusteringResult,
            int[] detectedBestStages,
            InteractionBetweenProteinInterfacesListContainer interactionBetweenProteinInterfacesContainer)
        {
            if (string.IsNullOrWhiteSpace(pdbFilename))
            {
                throw new ArgumentOutOfRangeException(nameof(pdbFilename));
            }

            if (!File.Exists(pdbFilename))
            {
                throw new FileNotFoundException("File not found", pdbFilename);
            }

            if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(singularAaToAaInteractions))
            {
                throw new ArgumentOutOfRangeException(nameof(singularAaToAaInteractions));
            }

            if (ParameterValidation.IsClusteringFullResultListContainerNullOrEmpty(proteinInterfacesClusteringResult))
            {
                throw new ArgumentOutOfRangeException(nameof(proteinInterfacesClusteringResult));
            }

            if (ParameterValidation.IsIntArrayNullOrEmpty(detectedBestStages))
            {
                throw new ArgumentOutOfRangeException(nameof(detectedBestStages));
            }

            // ProteinInterfaces are clusters with non-proteinInterfaces removed.

            var    result      = new List <ProteinInterfaceSequenceAndPositionData>();
            string proteinId   = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);
            int    totalChains = proteinInterfacesClusteringResult.ChainList.Count;

            for (int chainIndex = 0; chainIndex < totalChains; chainIndex++)
            {
                int    stageIndex    = detectedBestStages[chainIndex];
                string chainIdLetter = SpreadsheetFileHandler.AlphabetLetterRollOver(chainIndex);

                List <ClusteringFullResultListContainer.Chain.Stage.Cluster> proteinInterfaceList = proteinInterfacesClusteringResult.ChainList[chainIndex].StageList[stageIndex].ClusterList;

                List <ClusteringFullResultListContainer.Chain.Stage.Cluster> nonEmptyProteinInterfaceList = proteinInterfaceList.Where(a => a != null && a.AtomIndexList != null && a.AtomIndexList.Count > 0).ToList();

                // loop through each proteinInterface
                for (int proteinInterfaceIndex = 0; proteinInterfaceIndex < nonEmptyProteinInterfaceList.Count; proteinInterfaceIndex++)
                {
                    ClusteringFullResultListContainer.Chain.Stage.Cluster proteinInterface = nonEmptyProteinInterfaceList[proteinInterfaceIndex];

                    // Find min and max residue sequence index value in the proteinInterface

                    MinMax proteinInterfaceResidueSequenceIndexes = MinMaxResidueSequenceIndex(proteinInterface, singularAaToAaInteractions, chainIndex);
                    int    proteinInterfaceLength = CalculateProteinInterfaceLength(proteinInterfaceResidueSequenceIndexes.Min, proteinInterfaceResidueSequenceIndexes.Max);

                    string proteinInterfaceIdLetter = SpreadsheetFileHandler.AlphabetLetterRollOver(proteinInterfaceIndex);

                    var proteinInterfacePositionData = new ProteinInterfaceSequenceAndPositionData
                    {
                        FullProteinInterfaceId = new FullProteinInterfaceId(proteinId, chainIndex, proteinInterfaceIndex, proteinInterfaceResidueSequenceIndexes.Min, proteinInterfaceResidueSequenceIndexes.Max),
                        ChainIdLetter          = chainIdLetter,

                        ProteinInterfaceIdLetter = proteinInterfaceIdLetter,

                        StartPosition          = proteinInterfaceResidueSequenceIndexes.Min,
                        EndPosition            = proteinInterfaceResidueSequenceIndexes.Max,
                        ProteinInterfaceLength = CalculateProteinInterfaceLength(proteinInterfaceResidueSequenceIndexes.Min, proteinInterfaceResidueSequenceIndexes.Max)
                    };
                    proteinInterfacePositionData.AminoAcidSequenceAllResidueSequenceIndexes = new ProteinInterfaceAminoAcidMetaData[proteinInterfacePositionData.ProteinInterfaceLength];

                    proteinInterfacePositionData.AminoAcidSequenceAll1L             = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L = "";

                    proteinInterfacePositionData.AminoAcidSequenceAll3L             = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L = "";

                    //int foundAtomCount = 0;

                    const string placeholder1L = "_";
                    const string placeholder3L = "___";

                    for (int residueSequenceIndex = proteinInterfaceResidueSequenceIndexes.Min; residueSequenceIndex <= proteinInterfaceResidueSequenceIndexes.Max; residueSequenceIndex++)
                    {
                        /* questions
                         * 1. does this reside interact with another reside which is also part of a proteinInterface?
                         * 2. if not, does this reside interact at all?
                         */

                        var proteinInterfaceAminoAcidMetaData = new ProteinInterfaceAminoAcidMetaData();
                        proteinInterfacePositionData.AminoAcidSequenceAllResidueSequenceIndexes[proteinInterfacePositionData.AminoAcidSequenceAll1L.Length] = proteinInterfaceAminoAcidMetaData;

                        ATOM_Record foundAtomInsidePdbFile = AtomSearchMethods.FindAtomInsidePdbFileChain(pdbFileChains, chainIndex, residueSequenceIndex);

                        if (foundAtomInsidePdbFile == null)
                        {
                            // Non-CA atom is loaded here in case of missing CA atom to find the AA code for the resSeq index
                            var chainIdList = pdbIdChainIdList != null ? (pdbIdChainIdList.ContainsKey(proteinId) ? pdbIdChainIdList[proteinId].ToArray() : null) : null;

                            ProteinChainListContainer pdbFileChains2 = ProteinDataBankFileOperations.PdbAtomicChains(pdbFilename, chainIdList, -1, -1, false);
                            foundAtomInsidePdbFile = AtomSearchMethods.FindAtomInsidePdbFileChain(pdbFileChains2, chainIndex, residueSequenceIndex);
                        }

                        proteinInterfaceAminoAcidMetaData.PdbResidueSequenceIndex          = residueSequenceIndex;
                        proteinInterfaceAminoAcidMetaData.ArrayMemberIndex                 = pdbFileChains.ChainList[chainIndex].AtomList.IndexOf(foundAtomInsidePdbFile);
                        proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions = new bool[proteinInterfaceLength];


                        if (foundAtomInsidePdbFile != null)
                        {
                            proteinInterfacePositionData.AminoAcidSequenceAll1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);

                            proteinInterfacePositionData.AminoAcidSequenceAll3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                        }
                        else
                        {
                            proteinInterfacePositionData.AminoAcidSequenceAll1L             += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L  += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += placeholder1L;

                            proteinInterfacePositionData.AminoAcidSequenceAll3L             += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L  += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += placeholder3L;

                            proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.NoInteractionFound;
                            proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames1L += placeholder1L;
                            proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames3L += placeholder3L;
                            continue;
                        }

                        List <ATOM_Record> foundAtomInteractingWithAnotherProteinInterface = AtomSearchMethods.FindAtomInteractingWithOtherProteinInterfaces(foundAtomInsidePdbFile, interactionBetweenProteinInterfacesContainer, FindAtomInteractingWithAnotherProteinInterfaceOptions.FindAtomsInteractingWithOtherProteinInterfaces);
                        List <ATOM_Record> foundAtomInteractingWithNonProteinInterface     = AtomSearchMethods.FindAtomInteractingWithOtherProteinInterfaces(foundAtomInsidePdbFile, interactionBetweenProteinInterfacesContainer, FindAtomInteractingWithAnotherProteinInterfaceOptions.FindAtomsInteractingWithNonProteinInterfaces);

                        proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions = AminoAcidInteractionVector(singularAaToAaInteractions, proteinInterfacesClusteringResult, detectedBestStages, interactionBetweenProteinInterfacesContainer, chainIndex, proteinInterfaceIndex, residueSequenceIndex);

                        proteinInterfaceAminoAcidMetaData.ResidueName1L = AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                        proteinInterfaceAminoAcidMetaData.ResidueName3L = foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');

                        if (foundAtomInteractingWithAnotherProteinInterface != null)
                        {
                            foreach (ATOM_Record atom in foundAtomInteractingWithAnotherProteinInterface)
                            {
                                proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionResidueNames1L += AminoAcidConversions.AminoAcidNameToCode1L(atom.resName.FieldValue);
                                proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionResidueNames3L += atom.resName.FieldValue.PadRight(3, '_');
                            }
                        }

                        if (foundAtomInteractingWithNonProteinInterface != null)
                        {
                            foreach (ATOM_Record atom in foundAtomInteractingWithNonProteinInterface)
                            {
                                proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames1L += AminoAcidConversions.AminoAcidNameToCode1L(atom.resName.FieldValue);
                                proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames3L += atom.resName.FieldValue.PadRight(3, '_');
                            }
                        }

                        if (foundAtomInteractingWithAnotherProteinInterface != null && foundAtomInteractingWithAnotherProteinInterface.Count > 0)
                        {
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L  += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += placeholder1L;

                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L  += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += placeholder3L;

                            proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.InteractionWithAnotherProteinInterface;

                            if (foundAtomInteractingWithNonProteinInterface != null && foundAtomInteractingWithNonProteinInterface.Count > 0)
                            {
                                proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType |= ProteinInterfaceInteractionType.InteractionWithNonProteinInterface;
                            }
                        }
                        else if (foundAtomInteractingWithNonProteinInterface != null && foundAtomInteractingWithNonProteinInterface.Count > 0)
                        {
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L  += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += placeholder1L;

                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L  += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += placeholder3L;

                            proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.InteractionWithNonProteinInterface;
                        }
                        else
                        {
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L  += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);

                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L  += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');

                            proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.NoInteractionFound;
                        }
                    }

                    result.Add(proteinInterfacePositionData);
                }
            }

            return(result);
        }