Пример #1
0
        public void TestUpgma()
        {
            //decimal[,] distanceMatrix1 =
            //{
            //    { 0, 0, 0, 0, 0 },
            //    { 0, 0, 1, 1, 1 },
            //    { 0, 1, 0, 2, 2 },
            //    { 0, 1, 2, 0, 3 },
            //    { 0, 1, 2, 3, 0 },
            //};

            //var result = UpgmaClustering.Upgma(distanceMatrix1);

            decimal[,] distanceMatrix2 =
            {
                {  0, 19, 27,  8, 33, 18, 13 },
                { 19,  0, 31, 18, 36,  1, 13 },
                { 27, 31,  0, 26, 41, 32, 29 },
                {  8, 18, 26,  0, 31, 17, 14 },
                { 33, 36, 41, 31,  0, 35, 28 },
                { 18,  1, 32, 17, 35,  0, 12 },
                { 13, 13, 29, 14, 28, 12,  0 }
            };

            var names = new List <string>();

            for (var i = 0; i < distanceMatrix2.GetLength(0); i++)
            {
                names.Add("" + i);
            }


            List <string> vectorNames     = new List <string>();
            var           maxVectorLength = distanceMatrix2.GetLength(0) > distanceMatrix2.GetLength(1) ? distanceMatrix2.GetLength(0) : distanceMatrix2.GetLength(1);

            for (var i = 0; i < maxVectorLength; i++)
            {
                vectorNames.Add(SpreadsheetFileHandler.AlphabetLetterRollOver(i) + i);
            }

            List <List <UpgmaNode> > nodeList;
            List <List <string> >    treeList;

            var minimumOutputTreeLeafs = 1;

            List <string> finalTreeLeafOrderList;

            UpgmaClustering.Upgma(distanceMatrix2, vectorNames, minimumOutputTreeLeafs, out nodeList, out treeList, out finalTreeLeafOrderList);

            CheckUpgmaDistanceConsistency(nodeList[nodeList.Count - 1]);

            List <string> treeLeafOrderList;
            var           tree = Newick.NewickTreeFormat(nodeList[nodeList.Count - 1].ToList <GenericNode>(), names, out treeLeafOrderList, minimumOutputTreeLeafs);

            Console.WriteLine(tree);
        }
Пример #2
0
        public void SaveToFile(string saveFilename, int numberProteinInterfacesRequired = -1)
        {
            var rowList = new List <SpreadsheetCell[]> {
                SpreadsheetColumnHeadersRow()
            };

            rowList.AddRange(SpreadsheetDataRowList(numberProteinInterfacesRequired));

            SpreadsheetFileHandler.SaveSpreadsheet(saveFilename, null, rowList);
        }
Пример #3
0
        public static void SaveUniProtSpreadsheet(string saveFilename, ProgressActionSet progressActionSet)
        {
            if (saveFilename == null)
            {
                throw new ArgumentNullException(nameof(saveFilename));
            }
            List <SpreadsheetCell[]> spreadsheet = UniProtHeatMapSpreadsheet();

            string[] savedFiles = SpreadsheetFileHandler.SaveSpreadsheet(saveFilename, null, spreadsheet);

            foreach (string savedFile in savedFiles)
            {
                ProgressActionSet.Report("Saved: " + savedFile, progressActionSet);
            }
        }
        /// <summary>
        ///     Save to disk a list of sequences in CSV/TSV format.
        /// </summary>
        /// <param name="sequences"></param>
        /// <param name="filename"></param>
        public static string[] SaveSequencesAsSpreadsheet(List <ISequence> sequences, string filename, bool tsvFormat = false, bool xlsxFormat = true)
        {
            if (sequences == null || sequences.Count == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(sequences));
            }

            if (string.IsNullOrWhiteSpace(filename))
            {
                throw new ArgumentOutOfRangeException(nameof(filename));
            }

            if (!tsvFormat && !xlsxFormat)
            {
                throw new ArgumentOutOfRangeException(nameof(tsvFormat), tsvFormat, "No file formats were selected");
            }

            var headerColumnsRow = new[]
            {
                new SpreadsheetCell("PDB ID"),
                new SpreadsheetCell("Chain"),
                new SpreadsheetCell("Sequence"),
            };

            var rowList = new List <SpreadsheetCell[]>();

            rowList.Add(headerColumnsRow);

            foreach (ISequence sequence in sequences)
            {
                SequenceIdSplit.SequenceIdToPdbIdAndChainIdResult id = SequenceIdSplit.SequenceIdToPdbIdAndChainId(sequence.ID);

                var row = new[]
                {
                    new SpreadsheetCell(id.PdbId),
                    new SpreadsheetCell(id.ChainId),
                    new SpreadsheetCell(sequence.ConvertToString()),
                };

                rowList.Add(row);
            }

            string[] filesSavedStrings = SpreadsheetFileHandler.SaveSpreadsheet(filename, null, rowList, null, tsvFormat, xlsxFormat);

            return(filesSavedStrings);
        }
        public static string[] SaveVectorDistanceMatrixSpreadsheet(string saveFilename, List <VectorProteinInterfaceWhole> vectorProteinInterfaceWholeList, double[,] vectorDistanceMatrix)
        {
            if (saveFilename == null)
            {
                throw new ArgumentNullException(nameof(saveFilename));
            }
            if (vectorProteinInterfaceWholeList == null)
            {
                throw new ArgumentNullException(nameof(vectorProteinInterfaceWholeList));
            }
            if (vectorDistanceMatrix == null)
            {
                throw new ArgumentNullException(nameof(vectorDistanceMatrix));
            }

            var spreadsheet = new SpreadsheetCell[vectorDistanceMatrix.GetLength(0) + 1, vectorDistanceMatrix.GetLength(1) + 1];

            for (var indexX = 0; indexX < vectorDistanceMatrix.GetLength(0); indexX++)
            {
                spreadsheet[0, indexX + 1] = new SpreadsheetCell(vectorProteinInterfaceWholeList[indexX].FullProteinInterfaceId.ToString());
            }

            spreadsheet[0, 0] = new SpreadsheetCell("");

            for (var indexY = 0; indexY < vectorDistanceMatrix.GetLength(1); indexY++)
            {
                spreadsheet[indexY + 1, 0] = new SpreadsheetCell(vectorProteinInterfaceWholeList[indexY].FullProteinInterfaceId.ToString());
            }

            for (var indexX = 0; indexX < vectorDistanceMatrix.GetLength(0); indexX++)
            {
                for (var indexY = 0; indexY < vectorDistanceMatrix.GetLength(1); indexY++)
                {
                    spreadsheet[indexX + 1, indexY + 1] = new SpreadsheetCell(vectorDistanceMatrix[indexX, indexY]);
                }
            }

            var savedFiles = SpreadsheetFileHandler.SaveSpreadsheet(saveFilename, null, spreadsheet);

            return(savedFiles);
        }
        public static List <string> RemoveNonProteinInterfaceRecords(string pdbFilename, List <ProteinInterfaceId> proteinInterfaceIdList)
        {
            var proteinDataBankFile = new ProteinDataBankFormat.ProteinDataBankFile(pdbFilename);

            var result = new List <string>();

            var foundEndModel = false;
            var levelModel    = 0;

            for (var proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++)
            {
                var record = proteinDataBankFile.NextRecord();

                if (record == null)
                {
                    continue;
                }

                if (record.GetType() == typeof(MODEL_Record))
                {
                    levelModel++;
                }
                else if (record.GetType() == typeof(ENDMDL_Record))
                {
                    foundEndModel = true;
                    levelModel--;
                }
                else if (record.GetType() == typeof(ATOM_Record))
                {
                    var atom = (ATOM_Record)record;

                    var atomResSeq = ProteinDataBankFileOperations.NullableTryParseInt32(atom.resSeq.FieldValue);

                    if (atomResSeq != null)
                    {
                        var atomChain = atom.chainID.FieldValue;

                        if (!foundEndModel && proteinInterfaceIdList.Any(s => SpreadsheetFileHandler.AlphabetLetterRollOver(s.ChainId) == atomChain && atomResSeq >= s.FirstPosition && atomResSeq <= s.LastPosition))
                        {
                            result.Add(record.ColumnFormatLine);
                        }
                    }
                }
                else if (record.GetType() == typeof(HETATM_Record))
                {
                    var hetatm = (HETATM_Record)record;

                    var atomResSeq = ProteinDataBankFileOperations.NullableTryParseInt32(hetatm.resSeq.FieldValue);

                    if (atomResSeq == null)
                    {
                        continue;
                    }
                    var atomChain = hetatm.chainID.FieldValue;

                    if (!foundEndModel && proteinInterfaceIdList.Any(s => SpreadsheetFileHandler.AlphabetLetterRollOver(s.ChainId) == atomChain && atomResSeq >= s.FirstPosition && atomResSeq <= s.LastPosition))
                    {
                        result.Add(record.ColumnFormatLine);
                    }
                }
                else if (record.GetType() == typeof(LINK_Record))
                {
                    var link = (LINK_Record)record;

                    var atomResSeq1 = ProteinDataBankFileOperations.NullableTryParseInt32(link.resSeq1.FieldValue);
                    var atomResSeq2 = ProteinDataBankFileOperations.NullableTryParseInt32(link.resSeq2.FieldValue);

                    if (atomResSeq1 == null || atomResSeq2 == null)
                    {
                        continue;
                    }

                    if (!foundEndModel && proteinInterfaceIdList.Any(s => ((atomResSeq1 >= s.FirstPosition && atomResSeq1 <= s.LastPosition) && (atomResSeq2 >= s.FirstPosition && atomResSeq2 <= s.LastPosition)) ||
                                                                     ((atomResSeq2 >= s.FirstPosition && atomResSeq2 <= s.LastPosition) && (atomResSeq1 >= s.FirstPosition && atomResSeq1 <= s.LastPosition))))
                    {
                        result.Add(record.ColumnFormatLine);
                    }
                }
                else if (record.GetType() == typeof(ANISOU_Record))
                {
                    var anisou = (ANISOU_Record)record;

                    var atomResSeq = ProteinDataBankFileOperations.NullableTryParseInt32(anisou.resSeq.FieldValue);

                    if (atomResSeq == null)
                    {
                        continue;
                    }

                    var atomChain = anisou.chainID.FieldValue;

                    if (!foundEndModel && proteinInterfaceIdList.Any(s => SpreadsheetFileHandler.AlphabetLetterRollOver(s.ChainId) == atomChain && atomResSeq >= s.FirstPosition && atomResSeq <= s.LastPosition))
                    {
                        result.Add(record.ColumnFormatLine);
                    }
                }
                else if (record.GetType() == typeof(TER_Record))
                {
                    var ter = (TER_Record)record;

                    var atomChain = ter.chainID.FieldValue;

                    if (!foundEndModel && proteinInterfaceIdList.Any(s => SpreadsheetFileHandler.AlphabetLetterRollOver(s.ChainId) == atomChain))
                    {
                        result.Add(record.ColumnFormatLine);
                    }
                }
                else
                {
                    if (!foundEndModel || levelModel == 0)
                    {
                        result.Add(record.ColumnFormatLine);
                    }
                }
            }

            return(result);
        }
        /// <summary>
        ///     This method iterates through the provided FASTA files creating separate calculated outputs for each of them.
        /// </summary>
        /// <param name="fastaFiles">The FASTA files to process.</param>
        /// <param name="pdbFilesFolders">The locations where PDB files may be found.</param>
        /// <param name="spreadsheetSaveFilenameTemplate">A template filename to save the outputs.</param>
        /// <param name="saveTsv"></param>
        /// <param name="saveXl"></param>
        /// <param name="cancellationToken"></param>
        /// <param name="progressActionSet"></param>
        /// <param name="fileExistsOptions"></param>
        public static void MakeHomodimerStatisticsSpreadsheetsAndOutputFiles(decimal maxAtomInterationDistance, string[] fastaFiles, string[] pdbFilesFolders, string spreadsheetSaveFilenameTemplate, bool saveTsv, bool saveXl, CancellationToken cancellationToken, ProgressActionSet progressActionSet = null, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            if (fastaFiles == null || fastaFiles.Length == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(fastaFiles));
            }

            if (pdbFilesFolders == null || pdbFilesFolders.Length == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(pdbFilesFolders));
            }

            if (!saveTsv && !saveXl)
            {
                throw new ArgumentOutOfRangeException(nameof(saveTsv));
            }

            for (int fastaFileNumber = 0; fastaFileNumber < fastaFiles.Length; fastaFileNumber++)
            {
                string fastaFilename = fastaFiles[fastaFileNumber];

                if (string.IsNullOrWhiteSpace(fastaFilename))
                {
                    continue;
                }

                ProgressActionSet.Report("Attempting to open file: " + fastaFilename, progressActionSet);

                List <ISequence> sequences = SequenceFileHandler.LoadSequenceFile(fastaFilename, StaticValues.MolNameProteinAcceptedValues);

                var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequences);

                if ((sequences == null) || (sequences.Count == 0))
                {
                    ProgressActionSet.Report("Error could not load file: " + fastaFilename, progressActionSet);
                    continue;
                }
                ProgressActionSet.Report("Loaded " + sequences.Count + " sequences from file: " + fastaFilename, progressActionSet);

                List <string> pdbIdList      = FilterProteins.SequenceListToPdbIdList(sequences);
                string        appendFilename = FileAndPathMethods.FullPathToFilename(fastaFilename);

                ProgressActionSet.Report("Creating spreadsheets...", progressActionSet);
                Stopwatch stopwatch       = Stopwatch.StartNew();
                var       spreadsheetList = MakeHomodimerStatisticsSpreadsheetsList(cancellationToken, maxAtomInterationDistance, pdbFilesFolders, pdbIdList, pdbIdChainIdList, progressActionSet);
                stopwatch.Stop();
                ProgressActionSet.Report("Finished calculating spreadsheet data [Elapsed: " + stopwatch.Elapsed.ToString(@"dd\:hh\:mm\:ss") + "]", progressActionSet);

                if (cancellationToken.IsCancellationRequested)
                {
                    //UserProteinInterfaceOperations.ProgressBarReset(progressBar, 0, 100, 0);
                    ////UserProteinInterfaceOperations.LabelEstimatedTimeRemainingUpdate(estimatedTimeRemaining, 0, 1, 1);

                    ProgressActionSet.StartAction(100, progressActionSet);
                    ProgressActionSet.ProgressAction(100, progressActionSet);
                    ProgressActionSet.FinishAction(false, progressActionSet);
                    ProgressActionSet.Report("Cancelled.", progressActionSet);
                    break;
                }


                for (int spreadsheetIndex = 0; spreadsheetIndex < spreadsheetList.Count; spreadsheetIndex++)
                {
                    var spreadsheet = spreadsheetList[spreadsheetIndex];

                    if (cancellationToken.IsCancellationRequested)
                    {
                        break;
                    }



                    // Remove the first row (which has the name for use in a worksheet title, not currently used)
                    var sheetName       = spreadsheet[0][0].CellData;
                    var spreadsheetName = spreadsheet[1][0].CellData;
                    spreadsheet.RemoveAt(0);

                    // "c:/dResults/Results - %date% %time% - %fasta_filename% - %spreadsheet_name%.tsv"
                    string saveFilename = spreadsheetSaveFilenameTemplate;

                    saveFilename = saveFilename.Replace("%spreadsheet_name%", spreadsheetName);
                    saveFilename = saveFilename.Replace("%fasta_filename%", appendFilename);
                    saveFilename = saveFilename.Replace("%date%", DateTime.Now.ToString("yyyy-MM-dd"));
                    saveFilename = saveFilename.Replace("%time%", DateTime.Now.ToString("HH.mm.ss"));
                    saveFilename = saveFilename.Replace("%batch_number%", ""); //string.Empty + (fastaFileNumber + 1));
                    saveFilename = saveFilename.Replace("%batch_letter%", ""); //SpreadsheetFileHandler.AlphabetLetterRollOver(spreadsheetIndex));

                    sheetName = sheetName.Replace("%spreadsheet_name%", spreadsheetName);
                    sheetName = sheetName.Replace("%fasta_filename%", appendFilename);
                    sheetName = sheetName.Replace("%date%", DateTime.Now.ToString("yyyy-MM-dd"));
                    sheetName = sheetName.Replace("%time%", DateTime.Now.ToString("HH.mm.ss"));
                    sheetName = sheetName.Replace("%batch_number%", ""); //string.Empty + (fastaFileNumber + 1));
                    sheetName = sheetName.Replace("%batch_letter%", ""); //SpreadsheetFileHandler.AlphabetLetterRollOver(spreadsheetIndex));


                    //var tsvFilename = new FileInfo(FileAndPathMethods.RemoveFileExtension(saveFilename) + ".tsv");

                    var xlFilename = new FileInfo(FileAndPathMethods.RemoveFileExtension(saveFilename) + ".xlsx");

                    var savedFiles = SpreadsheetFileHandler.SaveSpreadsheet(xlFilename.FullName, new[] { sheetName }, spreadsheet, null, saveTsv, saveXl, fileExistsOptions);

                    ProgressActionSet.ReportFilesSaved(savedFiles, progressActionSet);
                }
            }

            ProgressActionSet.Report("Finished processing files.", progressActionSet);
        }
Пример #8
0
        public static List <AminoAcidDistributionSpreadsheetRecord> PatternDistributionSpreadsheetRecords(List <VectorProteinInterfaceWhole> vectorProteinInterfaceWholeList, List <string> pdbIdList, List <ISequence> seqList, int vectorType)
        {
            if (vectorProteinInterfaceWholeList == null)
            {
                throw new ArgumentNullException(nameof(vectorProteinInterfaceWholeList));
            }
            if (pdbIdList == null)
            {
                throw new ArgumentNullException(nameof(pdbIdList));
            }
            if (seqList == null)
            {
                throw new ArgumentNullException(nameof(seqList));
            }

            var result = new List <AminoAcidDistributionSpreadsheetRecord>();

            // patterns

            var patternProteinDictionary                        = new Dictionary <string, AminoAcidChainComposition>();
            var patternProteinInterfaceDictionary               = new Dictionary <string, AminoAcidChainComposition>();
            var patternProteinInterfaceInteractionDictionary    = new Dictionary <string, AminoAcidChainComposition>();
            var patternProteinInterfaceNonInteractionDictionary = new Dictionary <string, AminoAcidChainComposition>();

            foreach (var vectorProteinInterfaceWhole in vectorProteinInterfaceWholeList)
            {
                var interactionBools = vectorProteinInterfaceWhole.InteractionBools();

                string pattern;
                if (vectorType >= 0 && vectorType <= 3)
                {
                    pattern = vectorProteinInterfaceWhole.VectorString(vectorType);
                }
                else if (vectorType == 4)
                {
                    pattern = vectorProteinInterfaceWhole.SecondaryStructure;
                }
                else if (vectorType == 5)
                {
                    pattern = "ProteinInterface Length " + vectorProteinInterfaceWhole.ProteinInterfaceLength;
                }
                else
                {
                    throw new ArgumentOutOfRangeException(nameof(vectorType));
                }

                if (!patternProteinDictionary.ContainsKey(pattern))
                {
                    patternProteinDictionary.Add(pattern, new AminoAcidChainComposition());
                }
                if (!patternProteinInterfaceDictionary.ContainsKey(pattern))
                {
                    patternProteinInterfaceDictionary.Add(pattern, new AminoAcidChainComposition());
                }
                if (!patternProteinInterfaceInteractionDictionary.ContainsKey(pattern))
                {
                    patternProteinInterfaceInteractionDictionary.Add(pattern, new AminoAcidChainComposition());
                }
                if (!patternProteinInterfaceNonInteractionDictionary.ContainsKey(pattern))
                {
                    patternProteinInterfaceNonInteractionDictionary.Add(pattern, new AminoAcidChainComposition());
                }

                var aminoAcids1L = vectorProteinInterfaceWhole.ProteinInterfaceAminoAcids1L();

                for (int index = 0; index < aminoAcids1L.Length; index++)
                {
                    var c = aminoAcids1L[index];

                    patternProteinInterfaceDictionary[pattern].IncrementAminoAcidCount(c);

                    if (interactionBools[index])
                    {
                        patternProteinInterfaceInteractionDictionary[pattern].IncrementAminoAcidCount(c);
                    }
                    else
                    {
                        patternProteinInterfaceNonInteractionDictionary[pattern].IncrementAminoAcidCount(c);
                    }
                }

                patternProteinDictionary[pattern].NumberSamples++;
                patternProteinInterfaceDictionary[pattern].NumberSamples++;
                patternProteinInterfaceInteractionDictionary[pattern].NumberSamples++;
                patternProteinInterfaceNonInteractionDictionary[pattern].NumberSamples++;


                var bsSeqList = seqList.Where(a => SequenceIdSplit.SequenceIdToPdbIdAndChainId(a.ID).PdbId == vectorProteinInterfaceWhole.FullProteinInterfaceId.ProteinId && SequenceIdSplit.SequenceIdToPdbIdAndChainId(a.ID).ChainId == SpreadsheetFileHandler.AlphabetLetterRollOver(vectorProteinInterfaceWhole.FullProteinInterfaceId.ChainId)).ToList();
                foreach (var chain in bsSeqList)
                {
                    var seq = chain.ConvertToString();
                    foreach (var c in seq)
                    {
                        patternProteinDictionary[pattern].IncrementAminoAcidCount(c);
                    }
                }
            }

            foreach (var kvp in patternProteinDictionary)
            {
                var recordAllComposition = new AminoAcidDistributionSpreadsheetRecord();
                var recordProteinInterfaceComposition               = new AminoAcidDistributionSpreadsheetRecord();
                var recordProteinInterfaceInteractionComposition    = new AminoAcidDistributionSpreadsheetRecord();
                var recordProteinInterfaceNonInteractionComposition = new AminoAcidDistributionSpreadsheetRecord();

                result.Add(recordAllComposition);
                result.Add(recordProteinInterfaceComposition);
                result.Add(recordProteinInterfaceInteractionComposition);
                result.Add(recordProteinInterfaceNonInteractionComposition);

                recordAllComposition.Pattern = kvp.Key;
                recordProteinInterfaceComposition.Pattern               = kvp.Key;
                recordProteinInterfaceInteractionComposition.Pattern    = kvp.Key;
                recordProteinInterfaceNonInteractionComposition.Pattern = kvp.Key;

                var vectorTypeStr = VectorProteinInterfaceWhole.VectorStringDescription(vectorType);

                recordAllComposition.Type = vectorTypeStr;
                recordProteinInterfaceComposition.Type               = vectorTypeStr;
                recordProteinInterfaceInteractionComposition.Type    = vectorTypeStr;
                recordProteinInterfaceNonInteractionComposition.Type = vectorTypeStr;

                recordAllComposition.Metric = "Protein";
                recordProteinInterfaceComposition.Metric               = "ProteinInterface";
                recordProteinInterfaceInteractionComposition.Metric    = "ProteinInterface interactions";
                recordProteinInterfaceNonInteractionComposition.Metric = "ProteinInterface non interactions";

                // number of samples
                recordAllComposition.NumberOfSamples = kvp.Value.NumberSamples;
                recordProteinInterfaceComposition.NumberOfSamples               = patternProteinInterfaceDictionary[kvp.Key].NumberSamples;
                recordProteinInterfaceInteractionComposition.NumberOfSamples    = patternProteinInterfaceInteractionDictionary[kvp.Key].NumberSamples;
                recordProteinInterfaceNonInteractionComposition.NumberOfSamples = patternProteinInterfaceNonInteractionDictionary[kvp.Key].NumberSamples;

                // number of amino acids
                recordAllComposition.TotalAminoAcids = kvp.Value.AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids].Sum();
                recordProteinInterfaceComposition.TotalAminoAcids               = patternProteinInterfaceDictionary[kvp.Key].AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids].Sum();
                recordProteinInterfaceInteractionComposition.TotalAminoAcids    = patternProteinInterfaceInteractionDictionary[kvp.Key].AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids].Sum();
                recordProteinInterfaceNonInteractionComposition.TotalAminoAcids = patternProteinInterfaceNonInteractionDictionary[kvp.Key].AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids].Sum();

                var allPercentage = AminoAcidChainComposition.ConvertToPercentage(kvp.Value);
                var proteinInterfacePercentage               = AminoAcidChainComposition.ConvertToPercentage(patternProteinInterfaceDictionary[kvp.Key]);
                var proteinInterfaceInteractionPercentage    = AminoAcidChainComposition.ConvertToPercentage(patternProteinInterfaceInteractionDictionary[kvp.Key]);
                var proteinInterfaceNonInteractionPercentage = AminoAcidChainComposition.ConvertToPercentage(patternProteinInterfaceNonInteractionDictionary[kvp.Key]);

                foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                {
                    var groupItemsTotal = AminoAcidGroups.AminoAcidGroups.GetTotalSubgroups(enumAminoAcidGroups);

                    for (var groupItemIndex = 0; groupItemIndex < groupItemsTotal; groupItemIndex++)
                    {
                        recordAllComposition.Distribution[(int)enumAminoAcidGroups][groupItemIndex] = allPercentage.AminoAcidGroupsCount[(int)enumAminoAcidGroups][groupItemIndex];
                        recordProteinInterfaceComposition.Distribution[(int)enumAminoAcidGroups][groupItemIndex]               = proteinInterfacePercentage.AminoAcidGroupsCount[(int)enumAminoAcidGroups][groupItemIndex];
                        recordProteinInterfaceInteractionComposition.Distribution[(int)enumAminoAcidGroups][groupItemIndex]    = proteinInterfaceInteractionPercentage.AminoAcidGroupsCount[(int)enumAminoAcidGroups][groupItemIndex];
                        recordProteinInterfaceNonInteractionComposition.Distribution[(int)enumAminoAcidGroups][groupItemIndex] = proteinInterfaceNonInteractionPercentage.AminoAcidGroupsCount[(int)enumAminoAcidGroups][groupItemIndex];
                    }
                }
            }

            return(result);
        }
Пример #9
0
        /// <summary>
        ///     Load proteinInterface data from the PDB file based on a list of already detected proteinInterfaces.
        ///     The detected proteinInterfaces may be missing data such as other atoms or residues which are also in the proteinInterface but were not
        ///     directly interacting.
        ///     The positions and lengths of the proteinInterfaces are also calculated.
        /// </summary>
        /// <param name="pdbFilename"></param>
        /// <param name="pdbFileChains"></param>
        /// <param name="singularAaToAaInteractions"></param>
        /// <param name="proteinInterfacesClusteringResult"></param>
        /// <param name="detectedBestStages"></param>
        /// <param name="interactionBetweenProteinInterfacesContainer"></param>
        /// <returns></returns>
        public static List <ProteinInterfaceSequenceAndPositionData> AnalyseProteinInterfacesSequenceAndPositionData(
            string pdbFilename,
            Dictionary <string, List <string> > pdbIdChainIdList,
            ProteinChainListContainer pdbFileChains,
            ProteinChainListContainer singularAaToAaInteractions,
            ClusteringFullResultListContainer proteinInterfacesClusteringResult,
            int[] detectedBestStages,
            InteractionBetweenProteinInterfacesListContainer interactionBetweenProteinInterfacesContainer)
        {
            if (string.IsNullOrWhiteSpace(pdbFilename))
            {
                throw new ArgumentOutOfRangeException(nameof(pdbFilename));
            }

            if (!File.Exists(pdbFilename))
            {
                throw new FileNotFoundException("File not found", pdbFilename);
            }

            if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(singularAaToAaInteractions))
            {
                throw new ArgumentOutOfRangeException(nameof(singularAaToAaInteractions));
            }

            if (ParameterValidation.IsClusteringFullResultListContainerNullOrEmpty(proteinInterfacesClusteringResult))
            {
                throw new ArgumentOutOfRangeException(nameof(proteinInterfacesClusteringResult));
            }

            if (ParameterValidation.IsIntArrayNullOrEmpty(detectedBestStages))
            {
                throw new ArgumentOutOfRangeException(nameof(detectedBestStages));
            }

            // ProteinInterfaces are clusters with non-proteinInterfaces removed.

            var    result      = new List <ProteinInterfaceSequenceAndPositionData>();
            string proteinId   = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);
            int    totalChains = proteinInterfacesClusteringResult.ChainList.Count;

            for (int chainIndex = 0; chainIndex < totalChains; chainIndex++)
            {
                int    stageIndex    = detectedBestStages[chainIndex];
                string chainIdLetter = SpreadsheetFileHandler.AlphabetLetterRollOver(chainIndex);

                List <ClusteringFullResultListContainer.Chain.Stage.Cluster> proteinInterfaceList = proteinInterfacesClusteringResult.ChainList[chainIndex].StageList[stageIndex].ClusterList;

                List <ClusteringFullResultListContainer.Chain.Stage.Cluster> nonEmptyProteinInterfaceList = proteinInterfaceList.Where(a => a != null && a.AtomIndexList != null && a.AtomIndexList.Count > 0).ToList();

                // loop through each proteinInterface
                for (int proteinInterfaceIndex = 0; proteinInterfaceIndex < nonEmptyProteinInterfaceList.Count; proteinInterfaceIndex++)
                {
                    ClusteringFullResultListContainer.Chain.Stage.Cluster proteinInterface = nonEmptyProteinInterfaceList[proteinInterfaceIndex];

                    // Find min and max residue sequence index value in the proteinInterface

                    MinMax proteinInterfaceResidueSequenceIndexes = MinMaxResidueSequenceIndex(proteinInterface, singularAaToAaInteractions, chainIndex);
                    int    proteinInterfaceLength = CalculateProteinInterfaceLength(proteinInterfaceResidueSequenceIndexes.Min, proteinInterfaceResidueSequenceIndexes.Max);

                    string proteinInterfaceIdLetter = SpreadsheetFileHandler.AlphabetLetterRollOver(proteinInterfaceIndex);

                    var proteinInterfacePositionData = new ProteinInterfaceSequenceAndPositionData
                    {
                        FullProteinInterfaceId = new FullProteinInterfaceId(proteinId, chainIndex, proteinInterfaceIndex, proteinInterfaceResidueSequenceIndexes.Min, proteinInterfaceResidueSequenceIndexes.Max),
                        ChainIdLetter          = chainIdLetter,

                        ProteinInterfaceIdLetter = proteinInterfaceIdLetter,

                        StartPosition          = proteinInterfaceResidueSequenceIndexes.Min,
                        EndPosition            = proteinInterfaceResidueSequenceIndexes.Max,
                        ProteinInterfaceLength = CalculateProteinInterfaceLength(proteinInterfaceResidueSequenceIndexes.Min, proteinInterfaceResidueSequenceIndexes.Max)
                    };
                    proteinInterfacePositionData.AminoAcidSequenceAllResidueSequenceIndexes = new ProteinInterfaceAminoAcidMetaData[proteinInterfacePositionData.ProteinInterfaceLength];

                    proteinInterfacePositionData.AminoAcidSequenceAll1L             = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L = "";

                    proteinInterfacePositionData.AminoAcidSequenceAll3L             = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L = "";

                    //int foundAtomCount = 0;

                    const string placeholder1L = "_";
                    const string placeholder3L = "___";

                    for (int residueSequenceIndex = proteinInterfaceResidueSequenceIndexes.Min; residueSequenceIndex <= proteinInterfaceResidueSequenceIndexes.Max; residueSequenceIndex++)
                    {
                        /* questions
                         * 1. does this reside interact with another reside which is also part of a proteinInterface?
                         * 2. if not, does this reside interact at all?
                         */

                        var proteinInterfaceAminoAcidMetaData = new ProteinInterfaceAminoAcidMetaData();
                        proteinInterfacePositionData.AminoAcidSequenceAllResidueSequenceIndexes[proteinInterfacePositionData.AminoAcidSequenceAll1L.Length] = proteinInterfaceAminoAcidMetaData;

                        ATOM_Record foundAtomInsidePdbFile = AtomSearchMethods.FindAtomInsidePdbFileChain(pdbFileChains, chainIndex, residueSequenceIndex);

                        if (foundAtomInsidePdbFile == null)
                        {
                            // Non-CA atom is loaded here in case of missing CA atom to find the AA code for the resSeq index
                            var chainIdList = pdbIdChainIdList != null ? (pdbIdChainIdList.ContainsKey(proteinId) ? pdbIdChainIdList[proteinId].ToArray() : null) : null;

                            ProteinChainListContainer pdbFileChains2 = ProteinDataBankFileOperations.PdbAtomicChains(pdbFilename, chainIdList, -1, -1, false);
                            foundAtomInsidePdbFile = AtomSearchMethods.FindAtomInsidePdbFileChain(pdbFileChains2, chainIndex, residueSequenceIndex);
                        }

                        proteinInterfaceAminoAcidMetaData.PdbResidueSequenceIndex          = residueSequenceIndex;
                        proteinInterfaceAminoAcidMetaData.ArrayMemberIndex                 = pdbFileChains.ChainList[chainIndex].AtomList.IndexOf(foundAtomInsidePdbFile);
                        proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions = new bool[proteinInterfaceLength];


                        if (foundAtomInsidePdbFile != null)
                        {
                            proteinInterfacePositionData.AminoAcidSequenceAll1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);

                            proteinInterfacePositionData.AminoAcidSequenceAll3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                        }
                        else
                        {
                            proteinInterfacePositionData.AminoAcidSequenceAll1L             += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L  += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += placeholder1L;

                            proteinInterfacePositionData.AminoAcidSequenceAll3L             += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L  += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += placeholder3L;

                            proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.NoInteractionFound;
                            proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames1L += placeholder1L;
                            proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames3L += placeholder3L;
                            continue;
                        }

                        List <ATOM_Record> foundAtomInteractingWithAnotherProteinInterface = AtomSearchMethods.FindAtomInteractingWithOtherProteinInterfaces(foundAtomInsidePdbFile, interactionBetweenProteinInterfacesContainer, FindAtomInteractingWithAnotherProteinInterfaceOptions.FindAtomsInteractingWithOtherProteinInterfaces);
                        List <ATOM_Record> foundAtomInteractingWithNonProteinInterface     = AtomSearchMethods.FindAtomInteractingWithOtherProteinInterfaces(foundAtomInsidePdbFile, interactionBetweenProteinInterfacesContainer, FindAtomInteractingWithAnotherProteinInterfaceOptions.FindAtomsInteractingWithNonProteinInterfaces);

                        proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions = AminoAcidInteractionVector(singularAaToAaInteractions, proteinInterfacesClusteringResult, detectedBestStages, interactionBetweenProteinInterfacesContainer, chainIndex, proteinInterfaceIndex, residueSequenceIndex);

                        proteinInterfaceAminoAcidMetaData.ResidueName1L = AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                        proteinInterfaceAminoAcidMetaData.ResidueName3L = foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');

                        if (foundAtomInteractingWithAnotherProteinInterface != null)
                        {
                            foreach (ATOM_Record atom in foundAtomInteractingWithAnotherProteinInterface)
                            {
                                proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionResidueNames1L += AminoAcidConversions.AminoAcidNameToCode1L(atom.resName.FieldValue);
                                proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionResidueNames3L += atom.resName.FieldValue.PadRight(3, '_');
                            }
                        }

                        if (foundAtomInteractingWithNonProteinInterface != null)
                        {
                            foreach (ATOM_Record atom in foundAtomInteractingWithNonProteinInterface)
                            {
                                proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames1L += AminoAcidConversions.AminoAcidNameToCode1L(atom.resName.FieldValue);
                                proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames3L += atom.resName.FieldValue.PadRight(3, '_');
                            }
                        }

                        if (foundAtomInteractingWithAnotherProteinInterface != null && foundAtomInteractingWithAnotherProteinInterface.Count > 0)
                        {
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L  += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += placeholder1L;

                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L  += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += placeholder3L;

                            proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.InteractionWithAnotherProteinInterface;

                            if (foundAtomInteractingWithNonProteinInterface != null && foundAtomInteractingWithNonProteinInterface.Count > 0)
                            {
                                proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType |= ProteinInterfaceInteractionType.InteractionWithNonProteinInterface;
                            }
                        }
                        else if (foundAtomInteractingWithNonProteinInterface != null && foundAtomInteractingWithNonProteinInterface.Count > 0)
                        {
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L  += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += placeholder1L;

                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L  += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += placeholder3L;

                            proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.InteractionWithNonProteinInterface;
                        }
                        else
                        {
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L  += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);

                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L  += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');

                            proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.NoInteractionFound;
                        }
                    }

                    result.Add(proteinInterfacePositionData);
                }
            }

            return(result);
        }
Пример #10
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="cancellationToken"></param>
        /// <param name="pdbFilename"></param>
        /// <param name="pdbIdChainIdList"></param>
        /// <param name="pdbFileChains"></param>
        /// <param name="singularAaToAaInteractions"></param>
        /// <param name="fullClusteringResult"></param>
        /// <returns></returns>
        public static ProteinInterfaceAnalysisResultData AnalyseProteinInterfaces(
            CancellationToken cancellationToken,
            decimal maxAtomInterationDistance,
            decimal minimumProteinInterfaceDensity,
            string pdbFilename,
            Dictionary <string, List <string> > pdbIdChainIdList,
            ProteinChainListContainer pdbFileChains,
            ProteinChainListContainer singularAaToAaInteractions,
            ClusteringFullResultListContainer fullClusteringResult)
        {
            if (ParameterValidation.IsLoadFilenameInvalid(pdbFilename))
            {
                throw new ArgumentOutOfRangeException(nameof(pdbFilename));
            }

            if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(singularAaToAaInteractions))
            {
                throw new ArgumentOutOfRangeException(nameof(singularAaToAaInteractions));
            }

            if (ParameterValidation.IsClusteringFullResultListContainerNullOrEmpty(fullClusteringResult))
            {
                throw new ArgumentOutOfRangeException(nameof(fullClusteringResult));
            }

            string proteinId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);

            List <List <int> > chainStageProteinInterfaceCount;

            // Find how many proteinInterfaces at each stage.
            ClusteringFullResultListContainer proteinInterfacesClusteringResult = DetectProteinInterfaces(proteinId, singularAaToAaInteractions, fullClusteringResult, out chainStageProteinInterfaceCount, ClusteringProteinInterfaceDensityDetectionOptions.ResidueSequenceIndex, minimumProteinInterfaceDensity);

            // Find the last stage having required number of proteinInterfaces.
            int[] detectedBestClusterStagesIndexes = ProteinInterfaceTreeOptimalStageDetection.FindFinalProteinInterfaceStageIndexes(singularAaToAaInteractions, fullClusteringResult, proteinInterfacesClusteringResult, chainStageProteinInterfaceCount);

            int totalChains = singularAaToAaInteractions.ChainList.Count;

            var interactionProteinInterfaceClusteringHierarchyDataList = new List <InteractionProteinInterfaceClusteringHierarchyData>();

            int[] numberProteinInterfacesPerChain = FindNumberProteinInterfacesPerChain(proteinInterfacesClusteringResult, detectedBestClusterStagesIndexes);

            for (int chainIndex = 0; chainIndex < totalChains; chainIndex++)
            {
                int stageIndex = detectedBestClusterStagesIndexes[chainIndex];

                string chainIdLetter = SpreadsheetFileHandler.AlphabetLetterRollOver(chainIndex);

                var interactionProteinInterfaceClusteringHierarchyData = new InteractionProteinInterfaceClusteringHierarchyData(proteinId, chainIdLetter, numberProteinInterfacesPerChain[chainIndex], stageIndex + 1, fullClusteringResult.ChainList[chainIndex].StageList.Count);

                interactionProteinInterfaceClusteringHierarchyDataList.Add(interactionProteinInterfaceClusteringHierarchyData);
            }

            InteractionBetweenProteinInterfacesListContainer interactionBetweenProteinInterfacesContainer = CrossProteinInterfaceInteractions.FindInteractionsBetweenAnyProteinInterfaces(cancellationToken, maxAtomInterationDistance, pdbFilename, pdbIdChainIdList, pdbFileChains, singularAaToAaInteractions, fullClusteringResult, proteinInterfacesClusteringResult, detectedBestClusterStagesIndexes);

            List <ProteinInterfaceSequenceAndPositionData> analyseProteinInterfacesSequenceAndPositionData = AnalyseProteinInterfacesSequenceAndPositionData(pdbFilename, pdbIdChainIdList, pdbFileChains, singularAaToAaInteractions, proteinInterfacesClusteringResult, detectedBestClusterStagesIndexes, interactionBetweenProteinInterfacesContainer);

            var result = new ProteinInterfaceAnalysisResultData(
                detectedBestClusterStagesIndexes,
                proteinInterfacesClusteringResult,
                interactionProteinInterfaceClusteringHierarchyDataList,
                interactionBetweenProteinInterfacesContainer,
                analyseProteinInterfacesSequenceAndPositionData
                );

            return(result);
        }
Пример #11
0
 public override string ToString()
 {
     return(this.ProteinId + "_" + SpreadsheetFileHandler.AlphabetLetterRollOver(this.ChainId) + "_" + SpreadsheetFileHandler.AlphabetLetterRollOver(this.ProteinInterfaceId));
 }
        /*
         * private static int[] LastPdbChainResidueIndexes(string pdbFilename)
         * {
         *  //var result = new Dictionary<string,int>();
         *  var pdbFile = new ProteinDataBankFile(pdbFilename, new []{ ATOM_Record.ATOM_Field.FieldName });
         *
         *  //var x = ProteinDataBankFileOperations.PdbAtomAcidList();
         *
         *  var atomList = pdbFile.ProteinDataBankFileRecordList.Where(a => a.GetType() == typeof (ATOM_Record)).Select(a=>(ATOM_Record)a).ToList();
         *
         *  var chainIdList = atomList.Select(a=>a.chainID.FieldValue.ToUpperInvariant()).Distinct().ToList();
         *
         *  var result = new int[chainIdList.Count];
         *
         *  for (int index = 0; index < chainIdList.Count; index++)
         *  {
         *      var chainId = chainIdList[index];
         *      var maxResidueIndex = atomList.Where(a => a.chainID.FieldValue.ToUpperInvariant() == chainId).Select(a => int.Parse(a.resSeq.FieldValue)).Max();
         *
         *      result[index] = maxResidueIndex;
         *  }
         *
         *  return result;
         * }
         */

        private static VectorProteinInterfaceWhole MakeVectorProteinInterfaceWhole(string pdbFilename, ProteinInterfaceSequenceAndPositionData proteinInterfaceSequenceAndPositionData, bool reversedSequence, bool reversedInteractions)
        {
            if (pdbFilename == null)
            {
                throw new ArgumentNullException(nameof(pdbFilename));
            }
            if (proteinInterfaceSequenceAndPositionData == null)
            {
                throw new ArgumentNullException(nameof(proteinInterfaceSequenceAndPositionData));
            }

            ProteinInterfaceAminoAcidMetaData[] proteinInterfaceAminoAcidMetaDataArray = proteinInterfaceSequenceAndPositionData.AminoAcidSequenceAllResidueSequenceIndexes;

            var vectorProteinInterfaceWhole = new VectorProteinInterfaceWhole
            {
                FullProteinInterfaceId    = new FullProteinInterfaceId(proteinInterfaceSequenceAndPositionData.FullProteinInterfaceId),
                ProteinInterfaceLength    = proteinInterfaceSequenceAndPositionData.ProteinInterfaceLength,
                FirstResidueSequenceIndex = proteinInterfaceSequenceAndPositionData.StartPosition,
                LastResidueSequenceIndex  = proteinInterfaceSequenceAndPositionData.EndPosition,
                ReversedInteractions      = reversedInteractions,
                ReversedSequence          = reversedSequence,
            };

            //vectorProteinInterfaceWhole.FullSequenceLength = LastPdbChainResidueIndexes(pdbFilename)[vectorProteinInterfaceWhole.FullProteinInterfaceId.ChainId];

            vectorProteinInterfaceWhole.SecondaryStructure = ProteinInterfaceSecondaryStructureLoader.ProteinInterfaceSecondaryStructure(pdbFilename, SpreadsheetFileHandler.AlphabetLetterRollOver(vectorProteinInterfaceWhole.FullProteinInterfaceId.ChainId), vectorProteinInterfaceWhole.FirstResidueSequenceIndex, vectorProteinInterfaceWhole.LastResidueSequenceIndex, vectorProteinInterfaceWhole.ReversedSequence);

            for (int proteinInterfaceAminoAcidMetaDataArrayIndex = 0; proteinInterfaceAminoAcidMetaDataArrayIndex < proteinInterfaceAminoAcidMetaDataArray.Length; proteinInterfaceAminoAcidMetaDataArrayIndex++)
            {
                ProteinInterfaceAminoAcidMetaData proteinInterfaceAminoAcidMetaData = proteinInterfaceAminoAcidMetaDataArray[proteinInterfaceAminoAcidMetaDataArrayIndex];

                var vectorProteinInterfacePart = new VectorProteinInterfacePart(proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions.Length)
                {
                    FullProteinInterfaceId = new FullProteinInterfaceId(proteinInterfaceSequenceAndPositionData.FullProteinInterfaceId),
                    ResidueId               = proteinInterfaceAminoAcidMetaDataArrayIndex,
                    SourceAminoAcid1L       = proteinInterfaceAminoAcidMetaData.ResidueName1L,
                    SourceAminoAcid3L       = proteinInterfaceAminoAcidMetaData.ResidueName3L,
                    InteractionAminoAcids1L = proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionResidueNames1L,
                    InteractionNonProteinInterfaceAminoAcids1L = proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames1L,
                    InteractionFlagBools = new bool[proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions.Length]
                };

                vectorProteinInterfaceWhole.VectorProteinInterfacePartList.Add(vectorProteinInterfacePart);

                Array.Copy(proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions, vectorProteinInterfacePart.InteractionFlagBools, proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions.Length);

                if (reversedInteractions)
                {
                    Array.Reverse(vectorProteinInterfacePart.InteractionFlagBools);
                }

                vectorProteinInterfacePart.InteractionToNonProteinInterface = proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType.HasFlag(ProteinInterfaceInteractionType.InteractionWithNonProteinInterface);
            }

            if (vectorProteinInterfaceWhole.ReversedSequence)
            {
                vectorProteinInterfaceWhole.VectorProteinInterfacePartList.Reverse();
            }

            return(vectorProteinInterfaceWhole);
        }