Пример #1
0
        static void Main(string[] args)
        {
            // the indexes of data, contacts1 and contacts2 all match

            var data = MultiBindingInterface.Load(@"c:\pdb\new_data_set.csv");

            var contactsPartner1 =
                data.Select(
                    a =>
            {
                var x = FindAtomicContacts.AtomPair.LoadAtomPairList(@"C:\pdb\new_data_set\contacts\contacts_" +
                                                                     a.InteractionChainsPdb1.ToLowerInvariant() + ".pdb")

                        .Where(
                    b =>
                    (b.Atom1.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb1Chain1.ToUpperInvariant()
                     &&
                     b.Atom2.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb1Chain2.ToUpperInvariant())
                    ||
                    (b.Atom1.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb1Chain2.ToUpperInvariant()
                     &&
                     b.Atom2.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb1Chain1.ToUpperInvariant())).ToList();

                x = x.Select(c =>
                {
                    if (c.Atom1.chainID.FieldValue.ToUpperInvariant() == a.InteractionChainsPdb1Chain2.ToUpperInvariant())
                    {
                        c.SwapAtoms();
                    }

                    return(c);
                }).ToList();

                return(x);
            }).ToList();

            var contactsPartner2 =
                data.Select(
                    a =>
            {
                var x = FindAtomicContacts.AtomPair.LoadAtomPairList(@"C:\pdb\new_data_set\contacts\contacts_" +
                                                                     a.InteractionChainsPdb2.ToLowerInvariant() + ".pdb")

                        .Where(
                    b =>
                    (b.Atom1.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb2Chain1.ToUpperInvariant()
                     &&
                     b.Atom2.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb2Chain2.ToUpperInvariant())
                    ||
                    (b.Atom1.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb2Chain2.ToUpperInvariant()
                     &&
                     b.Atom2.chainID.FieldValue.ToUpperInvariant() ==
                     a.InteractionChainsPdb2Chain1.ToUpperInvariant())).ToList();

                x = x.Select(c =>
                {
                    if (c.Atom1.chainID.FieldValue.ToUpperInvariant() == a.InteractionChainsPdb2Chain2.ToUpperInvariant())
                    {
                        c.SwapAtoms();
                    }

                    return(c);
                }).ToList();

                return(x);
            }).ToList();

            var interfacePartner1 = contactsPartner1.Select(a =>
            {
                var resSeqChain1 = a.Select(b => int.Parse(b.Atom1.resSeq.FieldValue)).ToList();
                var resSeqChain2 = a.Select(b => int.Parse(b.Atom2.resSeq.FieldValue)).ToList();

                if (resSeqChain1.Count > 0 && resSeqChain2.Count > 0)
                {
                    return(new Tuple <int, int, int, int>(resSeqChain1.Min(), resSeqChain1.Max(), resSeqChain2.Min(),
                                                          resSeqChain2.Max()));
                }
                else
                {
                    return(null);
                }
            }).ToList();


            var interfacePartner2 = contactsPartner2.Select(a =>
            {
                var resSeqChain1 = a.Select(b => int.Parse(b.Atom1.resSeq.FieldValue)).ToList();
                var resSeqChain2 = a.Select(b => int.Parse(b.Atom2.resSeq.FieldValue)).ToList();

                if (resSeqChain1.Count > 0 && resSeqChain2.Count > 0)
                {
                    return(new Tuple <int, int, int, int>(resSeqChain1.Min(), resSeqChain1.Max(), resSeqChain2.Min(),
                                                          resSeqChain2.Max()));
                }
                else
                {
                    return(null);
                }
            }).ToList();

            var resultData = new List <string>();

            for (int index = 0; index < data.Count; index++)
            {
                var d   = data[index];
                var cp1 = contactsPartner1[index];
                var cp2 = contactsPartner2[index];
                var ip1 = interfacePartner1[index];
                var ip2 = interfacePartner2[index];

                if (d == null || cp1 == null || cp2 == null || ip1 == null || ip2 == null)
                {
                    continue;
                }
                if (cp1.Count == 0 || cp2.Count == 0)
                {
                    continue;
                }

                var p1c1_pdb = FindAtomicContacts.PdbAtomicChains(@"c:\pdb\new_data_set\" + d.InteractionChainsPdb1 + ".pdb", new string[] { d.InteractionChainsPdb1Chain1 }, -1, -1, true);
                var p1c2_pdb = FindAtomicContacts.PdbAtomicChains(@"c:\pdb\new_data_set\" + d.InteractionChainsPdb1 + ".pdb", new string[] { d.InteractionChainsPdb1Chain2 }, -1, -1, true);
                var p2c1_pdb = FindAtomicContacts.PdbAtomicChains(@"c:\pdb\new_data_set\" + d.InteractionChainsPdb2 + ".pdb", new string[] { d.InteractionChainsPdb2Chain1 }, -1, -1, true);
                var p2c2_pdb = FindAtomicContacts.PdbAtomicChains(@"c:\pdb\new_data_set\" + d.InteractionChainsPdb2 + ".pdb", new string[] { d.InteractionChainsPdb2Chain2 }, -1, -1, true);

                var p1c1_res_seq = p1c1_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList();
                var p1c2_res_seq = p1c2_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList();
                var p2c1_res_seq = p2c1_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList();
                var p2c2_res_seq = p2c2_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList();

                var cp1a1_res_seq = cp1.Select(a => int.Parse(a.Atom1.resSeq.FieldValue)).ToList();
                var cp1a2_res_seq = cp1.Select(a => int.Parse(a.Atom2.resSeq.FieldValue)).ToList();
                var cp2a1_res_seq = cp2.Select(a => int.Parse(a.Atom1.resSeq.FieldValue)).ToList();
                var cp2a2_res_seq = cp2.Select(a => int.Parse(a.Atom2.resSeq.FieldValue)).ToList();


                var cp1a1_min = cp1a1_res_seq.Min();
                var cp1a1_max = cp1a1_res_seq.Max();
                var cp1a2_min = cp1a2_res_seq.Min();
                var cp1a2_max = cp1a2_res_seq.Max();

                var cp2a1_min = cp2a1_res_seq.Min();
                var cp2a1_max = cp2a1_res_seq.Max();
                var cp2a2_min = cp2a2_res_seq.Min();
                var cp2a2_max = cp2a2_res_seq.Max();

                var cp1a1_best50_min           = int.MinValue;
                var cp1a1_best50_max           = int.MinValue;
                var cp1a1_best50_interactions  = int.MinValue;
                var cp1a1_best50_middle_finder = new List <Tuple <int, int, int> >();

                var interface_target_length = 50;

                for (var x = cp1a1_min - interface_target_length; x <= cp1a1_max; x++)
                {
                    if (Math.Abs(cp1a1_max - cp1a1_min) <= interface_target_length)
                    {
                        cp1a1_best50_min          = cp1a1_min;
                        cp1a1_best50_max          = cp1a1_max;
                        cp1a1_best50_interactions = cp1a1_res_seq.Count;
                        break;
                    }

                    var min = x;
                    var max = x + interface_target_length > cp1a1_max ? cp1a1_max : x + interface_target_length;

                    var best50 = cp1a1_res_seq.Count(a => a >= cp1a1_best50_min && a <= cp1a1_best50_max);

                    if (best50 == cp1a1_best50_interactions)
                    {
                        cp1a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                    }

                    if (cp1a1_best50_interactions == int.MinValue || best50 > cp1a1_best50_interactions)
                    {
                        cp1a1_best50_middle_finder.Clear();
                        cp1a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                        cp1a1_best50_min          = min;
                        cp1a1_best50_max          = max;
                        cp1a1_best50_interactions = best50;
                    }

                    if (x + interface_target_length >= cp1a1_max)
                    {
                        break;
                    }
                }

                if (cp1a1_best50_middle_finder.Count > 2)
                {
                    var middle = cp1a1_best50_middle_finder[cp1a1_best50_middle_finder.Count / 2];
                    cp1a1_best50_min          = middle.Item1;
                    cp1a1_best50_max          = middle.Item2;
                    cp1a1_best50_interactions = middle.Item3;
                }

                var cp1a2_best50_min           = int.MinValue;
                var cp1a2_best50_max           = int.MinValue;
                var cp1a2_best50_interactions  = int.MinValue;
                var cp1a2_best50_middle_finder = new List <Tuple <int, int, int> >();
                for (var x = cp1a2_min - interface_target_length; x <= cp1a2_max; x++)
                {
                    if (Math.Abs(cp1a2_max - cp1a2_min) <= interface_target_length)
                    {
                        cp1a2_best50_min          = cp1a2_min;
                        cp1a2_best50_max          = cp1a2_max;
                        cp1a2_best50_interactions = cp1a2_res_seq.Count;
                        break;
                    }

                    var min = x;
                    var max = x + interface_target_length > cp1a2_max ? cp1a2_max : x + interface_target_length;

                    var best50 = cp1a2_res_seq.Count(a => a >= cp1a2_best50_min && a <= cp1a2_best50_max);

                    if (best50 == cp1a2_best50_interactions)
                    {
                        cp1a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                    }

                    if (cp1a2_best50_interactions == int.MinValue || best50 > cp1a2_best50_interactions)
                    {
                        cp1a2_best50_middle_finder.Clear();
                        cp1a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                        cp1a2_best50_min          = min;
                        cp1a2_best50_max          = max;
                        cp1a2_best50_interactions = best50;
                    }

                    if (x + interface_target_length >= cp1a2_max)
                    {
                        break;
                    }
                }

                if (cp1a2_best50_middle_finder.Count > 2)
                {
                    var middle = cp1a2_best50_middle_finder[cp1a2_best50_middle_finder.Count / 2];
                    cp1a2_best50_min          = middle.Item1;
                    cp1a2_best50_max          = middle.Item2;
                    cp1a2_best50_interactions = middle.Item3;
                }


                var cp2a1_best50_min           = int.MinValue;
                var cp2a1_best50_max           = int.MinValue;
                var cp2a1_best50_interactions  = int.MinValue;
                var cp2a1_best50_middle_finder = new List <Tuple <int, int, int> >();
                for (var x = cp2a1_min - interface_target_length; x <= cp2a1_max; x++)
                {
                    if (Math.Abs(cp2a1_max - cp2a1_min) <= interface_target_length)
                    {
                        cp2a1_best50_min          = cp2a1_min;
                        cp2a1_best50_max          = cp2a1_max;
                        cp2a1_best50_interactions = cp2a1_res_seq.Count;
                        break;
                    }
                    var min = x;
                    var max = x + interface_target_length > cp2a1_max ? cp2a1_max : x + interface_target_length;

                    var best50 = cp2a1_res_seq.Count(a => a >= cp2a1_best50_min && a <= cp2a1_best50_max);

                    if (best50 == cp2a1_best50_interactions)
                    {
                        cp2a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                    }

                    if (cp2a1_best50_interactions == int.MinValue || best50 > cp2a1_best50_interactions)
                    {
                        cp2a1_best50_middle_finder.Clear();
                        cp2a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                        cp2a1_best50_min          = min;
                        cp2a1_best50_max          = max;
                        cp2a1_best50_interactions = best50;
                    }

                    if (x + interface_target_length >= cp2a1_max)
                    {
                        break;
                    }
                }

                if (cp2a1_best50_middle_finder.Count > 2)
                {
                    var middle = cp2a1_best50_middle_finder[cp2a1_best50_middle_finder.Count / 2];
                    cp2a1_best50_min          = middle.Item1;
                    cp2a1_best50_max          = middle.Item2;
                    cp2a1_best50_interactions = middle.Item3;
                }

                var cp2a2_best50_min           = int.MinValue;
                var cp2a2_best50_max           = int.MinValue;
                var cp2a2_best50_interactions  = int.MinValue;
                var cp2a2_best50_middle_finder = new List <Tuple <int, int, int> >();
                for (var x = cp2a2_min - interface_target_length; x <= cp2a2_max; x++)
                {
                    if (Math.Abs(cp2a2_max - cp2a2_min) <= interface_target_length)
                    {
                        cp2a2_best50_min          = cp2a2_min;
                        cp2a2_best50_max          = cp2a2_max;
                        cp2a2_best50_interactions = cp2a2_res_seq.Count;
                        break;
                    }
                    var min = x;
                    var max = x + interface_target_length > cp2a2_max ? cp2a2_max : x + interface_target_length;

                    var best50 = cp2a2_res_seq.Count(a => a >= cp2a2_best50_min && a <= cp2a2_best50_max);

                    if (best50 == cp2a2_best50_interactions)
                    {
                        cp2a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                    }

                    if (cp2a2_best50_interactions == int.MinValue || best50 > cp2a2_best50_interactions)
                    {
                        cp2a2_best50_middle_finder.Clear();
                        cp2a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                        cp2a2_best50_min          = min;
                        cp2a2_best50_max          = max;
                        cp2a2_best50_interactions = best50;
                    }

                    if (x + interface_target_length >= cp2a2_max)
                    {
                        break;
                    }
                }

                if (cp2a2_best50_middle_finder.Count > 2)
                {
                    var middle = cp2a2_best50_middle_finder[cp2a2_best50_middle_finder.Count / 2];
                    cp2a2_best50_min          = middle.Item1;
                    cp2a2_best50_max          = middle.Item2;
                    cp2a2_best50_interactions = middle.Item3;
                }

                var cp1a1_interface = string.Join("", p1c1_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp1a1_best50_min && l <= cp1a1_best50_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp1a2_interface = string.Join("", p1c2_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp1a2_best50_min && l <= cp1a2_best50_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp2a1_interface = string.Join("", p2c1_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp2a1_best50_min && l <= cp2a1_best50_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp2a2_interface = string.Join("", p2c2_pdb.ChainList.First().AtomList.Where(a =>
                {
                    var l = int.Parse(a.resSeq.FieldValue);
                    return(l >= cp2a2_best50_min && l <= cp2a2_best50_max);
                }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                var cp1a1_interface_interactions = new string('_', cp1a1_interface.Length);
                cp1a1_interface_interactions = string.Join("", cp1a1_interface_interactions.Select((a, i) => cp1a1_res_seq.Contains(i + cp1a1_best50_min) ? "X" : "_").ToList());

                var cp1a2_interface_interactions = new string('_', cp1a2_interface.Length);
                cp1a2_interface_interactions = string.Join("", cp1a2_interface_interactions.Select((a, i) => cp1a2_res_seq.Contains(i + cp1a2_best50_min) ? "X" : "_").ToList());

                var cp2a1_interface_interactions = new string('_', cp2a1_interface.Length);
                cp2a1_interface_interactions = string.Join("", cp2a1_interface_interactions.Select((a, i) => cp2a1_res_seq.Contains(i + cp2a1_best50_min) ? "X" : "_").ToList());

                var cp2a2_interface_interactions = new string('_', cp2a2_interface.Length);
                cp2a2_interface_interactions = string.Join("", cp2a2_interface_interactions.Select((a, i) => cp2a2_res_seq.Contains(i + cp2a2_best50_min) ? "X" : "_").ToList());

                resultData.Add(string.Join(",", new string[] {
                    d.DomainSuperFamily,
                    d.InteractionChainsPdb1,
                    d.InteractionChainsPdb1Chain1, "" + ip1.Item1, "" + ip1.Item2, "" + cp1a1_best50_min, "" + cp1a1_best50_max, "" + cp1a1_best50_interactions, cp1a1_interface, cp1a1_interface_interactions,
                    d.InteractionChainsPdb1Chain2, "" + ip1.Item3, "" + ip1.Item4, "" + cp1a2_best50_min, "" + cp1a2_best50_max, "" + cp1a2_best50_interactions, cp1a2_interface, cp1a2_interface_interactions,
                    d.InteractionChainsPdb2,
                    d.InteractionChainsPdb2Chain1, "" + ip2.Item1, "" + ip2.Item2, "" + cp2a1_best50_min, "" + cp2a1_best50_max, "" + cp2a1_best50_interactions, cp2a1_interface, cp2a1_interface_interactions,
                    d.InteractionChainsPdb2Chain2, "" + ip2.Item3, "" + ip2.Item4, "" + cp2a2_best50_min, "" + cp2a2_best50_max, "" + cp2a2_best50_interactions, cp2a2_interface, cp2a2_interface_interactions,
                }));
            }

            resultData.Insert(0, string.Join(",", new string[] {
                "super family",

                "partner 1 pdb id",
                "partner 1 chain id 1", "p1c1 interface start", "p1c1 interface end", "p1c1 best 50 start", "p1c1 best 50 end", "p1c1 best 50 interactions", "p1c1 interface seq", "p1c1 interface mask",
                "partner 1 chain id 2", "p1c2 interface start", "p1c1 interface end", "p1c2 best 50 start", "p1c2 best 50 end", "p1c2 best 50 interactions", "p1c2 interface seq", "p1c2 interface mask",
                "partner 2 pdb id",
                "partner 2 chain id 1", "p2c1 interface start", "p2c1 interface end", "p2c1 best 50 start", "p2c1 best 50 end", "p2c1 best 50 interactions", "p2c1 interface seq", "p2c1 interface mask",
                "partner 2 chain id 2", "p2c2 interface start", "p2c2 interface end", "p2c2 best 50 start", "p2c2 best 50 end", "p2c2 best 50 interactions", "p2c2 interface seq", "p2c2 interface mask",
            }));

            File.WriteAllLines(@"c:\pdb\new_data_set_results.csv", resultData);
            return;
        }
Пример #2
0
        static void Main(string[] args)
        {
            var pdbFolder = @"C:\ds96ub_homologs\";

            var homologClusterData =
                FindHomologsCluster.FindHomologsCluster.HomologClusterData.Load(
                    @"c:\ds96ub_homologs\ds96ub_homologs_0.7.csv");

            var pdbFiles = Directory.GetFiles(pdbFolder, "*.pdb", SearchOption.TopDirectoryOnly);

            var pdbIdList = pdbFiles.Select(FindAtomicContacts.PdbIdFromPdbFilename).ToList();

            // only ca-atoms, ters and endmdls
            var pdbAtomsText =
                pdbFiles.Select(
                    a =>
                    File.ReadAllLines(a)
                    .Where(b => (b.StartsWith("ATOM ") && b[13] == 'C' && b[14] == 'A') || /*b.StartsWith("TER ") ||*/ b.StartsWith("ENDMDL "))
                    .ToList()).ToList();

            // only first nmr model
            pdbAtomsText = pdbAtomsText.Select(a =>
            {
                var x = a.FindIndex(b => b.StartsWith("ENDMDL "));
                return(x == -1 ? a : a.GetRange(0, x - 1));
            }).ToList();

            var pdbAtoms = pdbAtomsText.Select(a => a.Select(b => new ATOM_Record(b)).ToList()).ToList();

            // get list of unique chain ids
            var pdbChainIds = pdbAtoms.Select((a, i) => a.Select(b => char.ToUpperInvariant(b.chainID.FieldValue[0])).ToList()).Distinct().ToList();

            var pdbIdChainIdList = new List <Tuple <string, char> >();

            for (var i = 0; i < pdbIdList.Count; i++)
            {
                pdbIdChainIdList.AddRange(pdbChainIds[i].Select(chainId => new Tuple <string, char>(pdbIdList[i], chainId)));
            }
            pdbIdChainIdList = pdbIdChainIdList.Distinct().ToList();

            // for each chain
            var pdbContacts =
                pdbIdChainIdList.Select(a =>
            {
                var x =
                    FindAtomicContacts.AtomPair.LoadAtomPairList(@"C:\ds96ub_homologs\contacts\contacts_pdb" + a.Item1.ToUpperInvariant() + ".pdb")
                    .Where(b => char.ToUpperInvariant(b.Atom1.chainID.FieldValue[0]) == a.Item2 || char.ToUpperInvariant(b.Atom2.chainID.FieldValue[0]) == a.Item2)
                    .Select(c =>
                {
                    if (char.ToUpperInvariant(c.Atom1.chainID.FieldValue[0]) != a.Item2)
                    {
                        c.SwapAtoms();
                    }

                    return(c);
                }).ToList();

                return(x);
            }).ToList();



            // res min, res max, best min, best max, interface aa, interface mask
            var pdbInterfaces = new List <Ds96UbInterface>();

            var interface_target_length = 50;


            for (int index = 0; index < pdbContacts.Count; index++)
            {
                var pdbId   = pdbIdChainIdList[index].Item1;
                var chainId = pdbIdChainIdList[index].Item2;

                var pdbContact = pdbContacts[index];

                if (pdbContact.Count == 0)
                {
                    continue;
                }

                var contactChains = pdbContact.Where(a => char.ToUpperInvariant(a.Atom2.chainID.FieldValue[0]) != chainId).Select(a => char.ToUpperInvariant(a.Atom2.chainID.FieldValue[0])).Distinct().ToList();

                foreach (var contactChain in contactChains)
                {
                    var pdbContactsResSeqIds =
                        pdbContact.Where(a => char.ToUpperInvariant(a.Atom1.chainID.FieldValue[0]) == chainId &&
                                         char.ToUpperInvariant(a.Atom2.chainID.FieldValue[0]) == contactChain)
                        .Select(a => int.Parse(a.Atom1.resSeq.FieldValue))
                        .ToList();


                    var res_seq     = pdbContactsResSeqIds;
                    var min_res_seq = pdbContactsResSeqIds.Min();
                    var max_res_seq = pdbContactsResSeqIds.Max();

                    var best50_min           = int.MinValue;
                    var best50_max           = int.MinValue;
                    var best50_interactions  = int.MinValue;
                    var best50_middle_finder = new List <Tuple <int, int, int> >();
                    for (var x = min_res_seq - interface_target_length; x <= max_res_seq; x++)
                    {
                        if (Math.Abs(max_res_seq - min_res_seq) <= interface_target_length)
                        {
                            best50_min          = min_res_seq;
                            best50_max          = max_res_seq;
                            best50_interactions = res_seq.Count;
                            break;
                        }

                        var min = x;
                        var max = x + interface_target_length > max_res_seq ? max_res_seq : x + interface_target_length;

                        var best50 = res_seq.Count(a => a >= best50_min && a <= best50_max);

                        if (best50 == best50_interactions)
                        {
                            best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                        }

                        if (best50_interactions == int.MinValue || best50 > best50_interactions)
                        {
                            best50_middle_finder.Clear();
                            best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50));
                            best50_min          = min;
                            best50_max          = max;
                            best50_interactions = best50;
                        }

                        if (x + interface_target_length >= max)
                        {
                            break;
                        }
                    }

                    if (best50_middle_finder.Count > 2)
                    {
                        var middle = best50_middle_finder[best50_middle_finder.Count / 2];
                        best50_min          = middle.Item1;
                        best50_max          = middle.Item2;
                        best50_interactions = middle.Item3;
                    }

                    var best50_interface_atoms = pdbAtoms[pdbIdList.IndexOf(pdbId)].Where(a =>
                    {
                        var l = int.Parse(a.resSeq.FieldValue);
                        var c = char.ToUpperInvariant(a.chainID.FieldValue[0]);
                        return(c == chainId && l >= best50_min && l <= best50_max);
                    }).ToList();

                    best50_interface_atoms = best50_interface_atoms.OrderBy(c => int.Parse(c.resSeq.FieldValue)).ToList();

                    var best50_interface = string.Join("", best50_interface_atoms.Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList());

                    var best50_mask = new string('_', best50_interface.Length);
                    best50_mask = string.Join("",
                                              best50_mask.Select((a, i) => res_seq.Contains(i + best50_min) ? "X" : "_").ToList());

                    pdbInterfaces.Add(new Ds96UbInterface(pdbId, chainId, contactChain, min_res_seq, max_res_seq, best50_min,
                                                          best50_max, best50_interactions, best50_interface, best50_mask, -1, "", "", 0, -1, "", "", 0));
                }
            }

            var homologClusterIndexes = homologClusterData.Select(a => a.ClusterIndex).Distinct().ToList();

            var homologClusters = homologClusterIndexes.Select(a => homologClusterData.Where(b => b.ClusterIndex == a).ToList()).ToList();

            var pdbInterfacesSorted = homologClusters.Select(a => pdbInterfaces.Where(b => a.Any(c => c.PdbId == b.PdbId && (char.ToUpperInvariant(c.ChainId[0]) == b.ChainId1 || char.ToUpperInvariant(c.ChainId[0]) == b.ChainId2))).ToList()).ToList();

            var outputData = new List <string>();


            foreach (var clusterIndex in homologClusterIndexes)
            {
                var cluster = pdbInterfacesSorted[clusterIndex - 1];

                // currently, cluster is a list of chain1-->chain2 interfaces ... so the 'chain2' interface needs adding to the record



                foreach (var inf1 in cluster)
                {
                    var partner =
                        cluster.Where(a => a != inf1 && a.PdbId == inf1.PdbId && inf1.ChainId2 == a.ChainId1)
                        .OrderByDescending(
                            a => InterfaceOverlap(inf1.MinResSeq, inf1.MaxResSeq, a.MinResSeq, a.MaxResSeq))
                        .ToList();

                    var first = partner.FirstOrDefault();
                    if (first != null)
                    {
                        inf1.Partner1InterfaceAminoAcids       = first.InterfaceAminoAcids;
                        inf1.Partner1InterfaceInteractionsMask = first.InterfaceInteractionsMask;
                        inf1.Partner1InterfaceOverlap          = InterfaceOverlap(inf1.MinResSeq, inf1.MaxResSeq, first.MinResSeq, first.MaxResSeq);
                    }

                    var second = partner.ElementAtOrDefault(1);
                    if (second != null)
                    {
                        inf1.Partner2InterfaceAminoAcids       = second.InterfaceAminoAcids;
                        inf1.Partner2InterfaceInteractionsMask = second.InterfaceInteractionsMask;
                        inf1.Partner2InterfaceOverlap          = InterfaceOverlap(inf1.MinResSeq, inf1.MaxResSeq, second.MinResSeq, second.MaxResSeq);
                    }
                }

                cluster = cluster.Where(a => a.Partner1InterfaceOverlap > 0 || a.Partner2InterfaceOverlap > 0).ToList();

                /*
                 * var partners =
                 *  foreach (var inf2 in cluster)
                 *  {
                 *      if (inf1.PdbId!=inf2.PdbId) continue;
                 *
                 *      if (inf1==inf2) continue;
                 *
                 *      if (!(inf1.ChainId1==inf2.ChainId2 || inf1.ChainId2==inf2.ChainId1)) continue;
                 *
                 *      //
                 *      var overlap = InterfaceOverlap(inf1.MinResSeq, inf1.MaxResSeq, inf2.MinResSeq, inf2.MaxResSeq);
                 *
                 *      if (overlap > 0)
                 *      {
                 *          if (overlap > inf1.Partner1InterfaceOverlap)
                 *          {
                 *              inf1.Partner1InterfaceOverlap = overlap;
                 *              inf1.Partner1InterfaceAminoAcids = inf2.InterfaceAminoAcids;
                 *              inf1.Partner1InterfaceInteractionsMask = inf2.InterfaceInteractionsMask;
                 *          }
                 *
                 *          if (overlap > inf2.Partner1InterfaceOverlap)
                 *          {
                 *              inf2.Partner1InterfaceOverlap = overlap;
                 *              inf2.Partner1InterfaceAminoAcids = inf1.InterfaceAminoAcids;
                 *              inf2.Partner1InterfaceInteractionsMask = inf1.InterfaceInteractionsMask;
                 *          }
                 *      }
                 *  }
                 * }
                 */

                //var interfaces = cluster.Select(a => a.InterfaceAminoAcids).ToList();
                //interfaces = interfaces.Where(a => interfaces.Count(b => b == a) > 1).ToList();

                //cluster = cluster.Where(a => a.InterfaceAminoAcids.Length >= 5 && cluster.Count(b => b.InterfaceAminoAcids == a.InterfaceAminoAcids) > 1).ToList();
                cluster = cluster.Where(a => a.InterfaceAminoAcids.Length >= 5).ToList();

                var clusterInterfaces = cluster.Select(a => a.InterfaceAminoAcids).ToList();

                var homologInterfaces = new List <List <string> >();
                foreach (var inf1 in clusterInterfaces)
                {
                    var    highest_score = decimal.MinValue;
                    string highest_inf   = null;

                    foreach (var inf2 in clusterInterfaces)
                    {
                        if (inf1 == inf2)
                        {
                            continue;
                        }

                        var score = FindAtomicContacts.AlignedSequenceSimilarityPercentage(inf1, inf2, FindAtomicContacts.AlignmentType.NMW);
                        if (score.Item1 > highest_score)
                        {
                            highest_score = score.Item1;
                            highest_inf   = inf2;
                        }
                    }
                    var y = homologInterfaces.FirstOrDefault(a => a.Contains(inf1) || a.Contains(highest_inf));
                    if (y != null)
                    {
                        if (!y.Contains(inf1))
                        {
                            y.Add(inf1);
                        }
                        if (!y.Contains(highest_inf))
                        {
                            y.Add(highest_inf);
                        }
                    }
                    else
                    {
                        var z = new List <string>();
                        z.Add(inf1);
                        z.Add(highest_inf);
                        homologInterfaces.Add(z);
                    }
                }

                foreach (var c in cluster)
                {
                    c.Partner1ClusterIndex = homologInterfaces.FindIndex(b => b.Contains(c.Partner1InterfaceAminoAcids));
                    c.Partner2ClusterIndex = homologInterfaces.FindIndex(b => b.Contains(c.Partner2InterfaceAminoAcids));
                }

                for (int index = 0; index < homologInterfaces.Count; index++)
                {
                    var homologInterface = homologInterfaces[index];



                    var cluster2 =
                        cluster.Where(a => homologInterface.Contains(a.InterfaceAminoAcids)
                                      )
                        .OrderBy(a => a.Partner1ClusterIndex)
                        .ThenBy(a => a.Partner2ClusterIndex)
                        .ThenBy(a => a.InterfaceAminoAcids)
                        .ThenBy(a => a.Partner1InterfaceAminoAcids)
                        .ThenBy(a => a.Partner2InterfaceAminoAcids)
                        .ToList();

                    var partners =
                        cluster2.Select(
                            a =>
                            new Tuple <string, string, string>(a.InterfaceAminoAcids, a.Partner1InterfaceAminoAcids,
                                                               a.Partner2InterfaceAminoAcids)).Distinct();

                    cluster2 =
                        partners.Select(
                            a =>
                            cluster2.FirstOrDefault(
                                b =>
                                b.InterfaceAminoAcids == a.Item1 && b.Partner1InterfaceAminoAcids == a.Item2 &&
                                b.Partner2InterfaceAminoAcids == a.Item3)).ToList();

                    outputData.Add("cluster " + clusterIndex + "." + index);
                    outputData.AddRange(cluster2.Select(a => a.ToString()).ToList());
                    outputData.Add("");
                }
            }

            File.WriteAllLines(@"c:\ds96ub_homologs\ds96ub_homologs_interfaces.csv", outputData);//pdbInterfaces.Select(a=>a.ToString()).ToList());
        }