Ejemplo n.º 1
0
        /// <summary>
        ///     This method removes sequences not having the required number of chains.
        /// </summary>
        /// <returns></returns>
        public static List <ISequence> RemoveSequencesWithIncorrectNumberOfChains(CancellationToken cancellationToken, List <ISequence> sequenceList, int numberOfChainsRequired = 2, ProgressActionSet progressActionSet = null)
        {
            if (sequenceList == null || sequenceList.Count == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(sequenceList));
            }

            if (progressActionSet == null)
            {
                throw new ArgumentNullException(nameof(progressActionSet));
            }

            var pdbIdListNotDistinct = FilterProteins.SequenceListToPdbIdList(sequenceList, false);

            ProgressActionSet.StartAction(pdbIdListNotDistinct.Count, progressActionSet);

            var workDivision = new WorkDivision <List <string> >(pdbIdListNotDistinct.Count);

            for (var threadIndex = 0; threadIndex < workDivision.ThreadCount; threadIndex++)
            {
                var localThreadIndex       = threadIndex;
                Task <List <string> > task = Task.Run(() =>
                {
                    var taskResult = pdbIdListNotDistinct.Where((a, pdbIdIndex) =>
                    {
                        if (pdbIdIndex < workDivision.ThreadFirstIndex[localThreadIndex] || pdbIdIndex > workDivision.ThreadLastIndex[localThreadIndex])
                        {
                            return(false);
                        }

                        workDivision.IncrementItemsCompleted(1);
                        ProgressActionSet.ProgressAction(1, progressActionSet);
                        ProgressActionSet.EstimatedTimeRemainingAction(workDivision.StartTicks, workDivision.ItemsCompleted, workDivision.ItemsToProcess, progressActionSet);

                        return(pdbIdListNotDistinct.Count(b => a == b) != numberOfChainsRequired);
                    }).ToList();

                    return(taskResult);
                }, cancellationToken);

                workDivision.TaskList.Add(task);
            }

            workDivision.WaitAllTasks();

            var sequencesWithIncorrectNumberOfChains = new List <string>();

            foreach (var task in workDivision.TaskList.Where(t => t != null && t.Result != null && t.IsCompleted && !t.IsCanceled && !t.IsFaulted))
            {
                sequencesWithIncorrectNumberOfChains.AddRange(task.Result);
            }

            var result = RemoveSequences(cancellationToken, sequenceList, sequencesWithIncorrectNumberOfChains);

            ProgressActionSet.FinishAction(true, progressActionSet);

            return(result);
        }
Ejemplo n.º 2
0
        public static List <string> RemoveStructuresWithIncorrectNumberOfChains(CancellationToken cancellationToken, string[] pdbFolders, List <string> pdbIdList = null, Dictionary <string, List <string> > pdbIdChainIdList = null, int numberChainsRequired = 2, ProgressActionSet progressActionSet = null, int totalThreads = -1)
        {
            if (pdbFolders == null || pdbFolders.Length == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(pdbFolders));
            }

            if (pdbIdList == null || pdbIdList.Count == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(pdbIdList));
            }

            if (progressActionSet == null)
            {
                throw new ArgumentNullException(nameof(progressActionSet));
            }

            var pdbFilesArray = ProteinDataBankFileOperations.GetPdbFilesArray(pdbFolders);

            pdbFilesArray = ProteinDataBankFileOperations.RemoveNonWhiteListedPdbIdFromPdbFilesArray(pdbIdList, pdbFilesArray);

            ProteinDataBankFileOperations.ShowMissingPdbFiles(pdbFilesArray, pdbIdList, progressActionSet);

            WorkDivision <List <string> > workDivision = new WorkDivision <List <string> >(pdbFilesArray.Length, totalThreads);

            ProgressActionSet.StartAction(pdbFilesArray.Length, progressActionSet);

            for (int threadIndex = 0; threadIndex < workDivision.ThreadCount; threadIndex++)
            {
                int localThreadIndex = threadIndex;

                Task <List <string> > task = Task.Run(() =>
                {
                    var taskResult = new List <string>();

                    for (int pdbFileNumber = workDivision.ThreadFirstIndex[localThreadIndex]; pdbFileNumber <= workDivision.ThreadLastIndex[localThreadIndex]; pdbFileNumber++)
                    {
                        if (cancellationToken.IsCancellationRequested)
                        {
                            break;
                        }

                        try
                        {
                            string pdbFilename = pdbFilesArray[pdbFileNumber];
                            string proteinId   = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);

                            // Check if the file found is included in the white list.
                            if (pdbIdList != null && !pdbIdList.Contains(proteinId))
                            {
                                continue;
                            }

                            var sequenceChainIdList = pdbIdChainIdList != null ? (pdbIdChainIdList.ContainsKey(proteinId) ? pdbIdChainIdList[proteinId].ToArray() : null) : null;
                            int chainCount          = ProteinDataBankFileOperations.PdbAtomicChainsCount(pdbFilename, sequenceChainIdList, numberChainsRequired);

                            if (chainCount != numberChainsRequired)
                            {
                                if (!taskResult.Contains(proteinId))
                                {
                                    taskResult.Add(proteinId);
                                }
                            }
                        }
                        finally
                        {
                            workDivision.IncrementItemsCompleted(1);

                            ProgressActionSet.ProgressAction(1, progressActionSet);
                            ProgressActionSet.EstimatedTimeRemainingAction(workDivision.StartTicks, workDivision.ItemsCompleted, workDivision.ItemsToProcess, progressActionSet);
                        }
                    }

                    return(taskResult);
                }, cancellationToken);
                workDivision.TaskList.Add(task);
            }

            workDivision.WaitAllTasks();

            ProgressActionSet.FinishAction(true, progressActionSet);

            var result = new List <string>();

            foreach (var task in workDivision.TaskList.Where(t => t != null && t.Result != null && t.IsCompleted && !t.IsCanceled && !t.IsFaulted))
            {
                result.AddRange(task.Result);
            }

            result = result.Distinct().ToList();

            return(result);
        }
Ejemplo n.º 3
0
        public static string[] MakeInteractionsOutput(CancellationToken cancellationToken, decimal maxAtomInterationDistance, string[] pdbFilesList, Dictionary <string, List <string> > pdbIdChainIdList, ProgressActionSet progressActionSet, bool outputToGui)
        {
            var interactionTasks = new List <Task <string> >();

            ProgressActionSet.StartAction(pdbFilesList.Length, progressActionSet);


            foreach (string pdbFilename in pdbFilesList)
            {
                string _pdbFilename = pdbFilename;
                while (interactionTasks.Count(t => t != null && !t.IsCompleted) >= Environment.ProcessorCount * 10)
                {
                    Task.WaitAny(interactionTasks.ToArray <Task>());
                }

                var interactionTask = Task.Run(() =>
                {
                    if (cancellationToken.IsCancellationRequested)
                    {
                        return(null);
                    }

                    List <AtomPair> interactionsList = SearchInteractions.FindInteractions(cancellationToken, maxAtomInterationDistance, _pdbFilename, pdbIdChainIdList);

                    string pdbId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(_pdbFilename);
                    if (string.IsNullOrEmpty(pdbId))
                    {
                        //pdbId = _pdbFilename;
                        throw new ArgumentNullException(nameof(pdbFilesList), "The file " + _pdbFilename + " has an invalid name.");
                    }

                    if (interactionsList == null)
                    {
                        interactionsList = new List <AtomPair>();
                    }

                    interactionsList = interactionsList.OrderBy(o => o.Distance).ToList();

                    var interactionsString = FormatInteractionOutput(pdbId, interactionsList);

                    if (outputToGui)
                    {
                        ProgressActionSet.Report(interactionsString, progressActionSet);
                    }

                    ProgressActionSet.ProgressAction(1, progressActionSet);
                    //ProgressActionSet.EstimatedTimeRemainingAction(startTicks, );

                    //////Console.WriteLine(_pdbFilename);

                    return(interactionsString);
                }, cancellationToken);

                interactionTasks.Add(interactionTask);
            }

            Task.WaitAll(interactionTasks.Where(t => t != null && !t.IsCompleted).ToArray <Task>());

            var interactionsStringsList = interactionTasks.OrderBy(t => t.Id).Where(t => t != null && t.IsCompleted && !t.IsCanceled && !t.IsFaulted && t.Result != null).Select(t => t.Result).ToArray();

            ProgressActionSet.FinishAction(true, progressActionSet);

            return(interactionsStringsList);
        }
        /// <summary>
        ///     This method iterates through the provided FASTA files creating separate calculated outputs for each of them.
        /// </summary>
        /// <param name="fastaFiles">The FASTA files to process.</param>
        /// <param name="pdbFilesFolders">The locations where PDB files may be found.</param>
        /// <param name="spreadsheetSaveFilenameTemplate">A template filename to save the outputs.</param>
        /// <param name="saveTsv"></param>
        /// <param name="saveXl"></param>
        /// <param name="cancellationToken"></param>
        /// <param name="progressActionSet"></param>
        /// <param name="fileExistsOptions"></param>
        public static void MakeHomodimerStatisticsSpreadsheetsAndOutputFiles(decimal maxAtomInterationDistance, string[] fastaFiles, string[] pdbFilesFolders, string spreadsheetSaveFilenameTemplate, bool saveTsv, bool saveXl, CancellationToken cancellationToken, ProgressActionSet progressActionSet = null, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            if (fastaFiles == null || fastaFiles.Length == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(fastaFiles));
            }

            if (pdbFilesFolders == null || pdbFilesFolders.Length == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(pdbFilesFolders));
            }

            if (!saveTsv && !saveXl)
            {
                throw new ArgumentOutOfRangeException(nameof(saveTsv));
            }

            for (int fastaFileNumber = 0; fastaFileNumber < fastaFiles.Length; fastaFileNumber++)
            {
                string fastaFilename = fastaFiles[fastaFileNumber];

                if (string.IsNullOrWhiteSpace(fastaFilename))
                {
                    continue;
                }

                ProgressActionSet.Report("Attempting to open file: " + fastaFilename, progressActionSet);

                List <ISequence> sequences = SequenceFileHandler.LoadSequenceFile(fastaFilename, StaticValues.MolNameProteinAcceptedValues);

                var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequences);

                if ((sequences == null) || (sequences.Count == 0))
                {
                    ProgressActionSet.Report("Error could not load file: " + fastaFilename, progressActionSet);
                    continue;
                }
                ProgressActionSet.Report("Loaded " + sequences.Count + " sequences from file: " + fastaFilename, progressActionSet);

                List <string> pdbIdList      = FilterProteins.SequenceListToPdbIdList(sequences);
                string        appendFilename = FileAndPathMethods.FullPathToFilename(fastaFilename);

                ProgressActionSet.Report("Creating spreadsheets...", progressActionSet);
                Stopwatch stopwatch       = Stopwatch.StartNew();
                var       spreadsheetList = MakeHomodimerStatisticsSpreadsheetsList(cancellationToken, maxAtomInterationDistance, pdbFilesFolders, pdbIdList, pdbIdChainIdList, progressActionSet);
                stopwatch.Stop();
                ProgressActionSet.Report("Finished calculating spreadsheet data [Elapsed: " + stopwatch.Elapsed.ToString(@"dd\:hh\:mm\:ss") + "]", progressActionSet);

                if (cancellationToken.IsCancellationRequested)
                {
                    //UserProteinInterfaceOperations.ProgressBarReset(progressBar, 0, 100, 0);
                    ////UserProteinInterfaceOperations.LabelEstimatedTimeRemainingUpdate(estimatedTimeRemaining, 0, 1, 1);

                    ProgressActionSet.StartAction(100, progressActionSet);
                    ProgressActionSet.ProgressAction(100, progressActionSet);
                    ProgressActionSet.FinishAction(false, progressActionSet);
                    ProgressActionSet.Report("Cancelled.", progressActionSet);
                    break;
                }


                for (int spreadsheetIndex = 0; spreadsheetIndex < spreadsheetList.Count; spreadsheetIndex++)
                {
                    var spreadsheet = spreadsheetList[spreadsheetIndex];

                    if (cancellationToken.IsCancellationRequested)
                    {
                        break;
                    }



                    // Remove the first row (which has the name for use in a worksheet title, not currently used)
                    var sheetName       = spreadsheet[0][0].CellData;
                    var spreadsheetName = spreadsheet[1][0].CellData;
                    spreadsheet.RemoveAt(0);

                    // "c:/dResults/Results - %date% %time% - %fasta_filename% - %spreadsheet_name%.tsv"
                    string saveFilename = spreadsheetSaveFilenameTemplate;

                    saveFilename = saveFilename.Replace("%spreadsheet_name%", spreadsheetName);
                    saveFilename = saveFilename.Replace("%fasta_filename%", appendFilename);
                    saveFilename = saveFilename.Replace("%date%", DateTime.Now.ToString("yyyy-MM-dd"));
                    saveFilename = saveFilename.Replace("%time%", DateTime.Now.ToString("HH.mm.ss"));
                    saveFilename = saveFilename.Replace("%batch_number%", ""); //string.Empty + (fastaFileNumber + 1));
                    saveFilename = saveFilename.Replace("%batch_letter%", ""); //SpreadsheetFileHandler.AlphabetLetterRollOver(spreadsheetIndex));

                    sheetName = sheetName.Replace("%spreadsheet_name%", spreadsheetName);
                    sheetName = sheetName.Replace("%fasta_filename%", appendFilename);
                    sheetName = sheetName.Replace("%date%", DateTime.Now.ToString("yyyy-MM-dd"));
                    sheetName = sheetName.Replace("%time%", DateTime.Now.ToString("HH.mm.ss"));
                    sheetName = sheetName.Replace("%batch_number%", ""); //string.Empty + (fastaFileNumber + 1));
                    sheetName = sheetName.Replace("%batch_letter%", ""); //SpreadsheetFileHandler.AlphabetLetterRollOver(spreadsheetIndex));


                    //var tsvFilename = new FileInfo(FileAndPathMethods.RemoveFileExtension(saveFilename) + ".tsv");

                    var xlFilename = new FileInfo(FileAndPathMethods.RemoveFileExtension(saveFilename) + ".xlsx");

                    var savedFiles = SpreadsheetFileHandler.SaveSpreadsheet(xlFilename.FullName, new[] { sheetName }, spreadsheet, null, saveTsv, saveXl, fileExistsOptions);

                    ProgressActionSet.ReportFilesSaved(savedFiles, progressActionSet);
                }
            }

            ProgressActionSet.Report("Finished processing files.", progressActionSet);
        }
Ejemplo n.º 5
0
        public static void FilterProteinInterfaceLengths(
            CancellationToken cancellationToken,
            decimal maxAtomInterationDistance,
            decimal minimumProteinInterfaceDensity,
            string[] sequenceListFileArray,
            string[] pdbFileDirectoryLocationArray,
            string filterProteinInterfacesLengthOutputFilename,
            bool filterProteinInterfaceCountsWithoutLengths,
            bool filterProteinInterfaceCountsWithLengths,
            FileExistsHandler.FileExistsOptions fileExistsOptions,
            ProgressActionSet progressActionSet)
        {
            if (sequenceListFileArray == null)
            {
                throw new ArgumentNullException(nameof(sequenceListFileArray));
            }
            if (pdbFileDirectoryLocationArray == null)
            {
                throw new ArgumentNullException(nameof(pdbFileDirectoryLocationArray));
            }
            if (filterProteinInterfacesLengthOutputFilename == null)
            {
                throw new ArgumentNullException(nameof(filterProteinInterfacesLengthOutputFilename));
            }
            if (!filterProteinInterfaceCountsWithoutLengths && !filterProteinInterfaceCountsWithLengths)
            {
                ProgressActionSet.Report("Cancelled: No filter options selected.", progressActionSet);
                return;
            }

            // Check all sequence files are found
            var missingSequenceFiles = sequenceListFileArray.Where(sequenceFile => !string.IsNullOrWhiteSpace(sequenceFile) && !File.Exists(sequenceFile)).ToList();

            if (missingSequenceFiles.Count > 0)
            {
                foreach (string missingSequenceFile in missingSequenceFiles)
                {
                    //throw new FileNotFoundException(sequenceFile);

                    ProgressActionSet.Report("Warning: Sequence file missing: " + missingSequenceFile, progressActionSet);
                }

                ProgressActionSet.Report("Cancelled: missing sequence files.", progressActionSet);
                return;
            }

            // Check all pdb folders are found
            var missingDirectoryList = pdbFileDirectoryLocationArray.Where(pdbDirectory => !string.IsNullOrWhiteSpace(pdbDirectory) && !Directory.Exists(pdbDirectory)).ToList();

            if (missingDirectoryList.Count > 0)
            {
                foreach (string pdbDirectory in missingDirectoryList)
                {
                    //throw new DirectoryNotFoundException(pdbDirectory);
                    ProgressActionSet.Report("Warning: Structure file directory missing: " + pdbDirectory, progressActionSet);
                }

                ProgressActionSet.Report("Cancelled: missing structure file directory.", progressActionSet);
                return;
            }

            const string proteinInterfacesTemplateText = "%proteinInterfaces%";

            if (string.IsNullOrWhiteSpace(filterProteinInterfacesLengthOutputFilename) || !filterProteinInterfacesLengthOutputFilename.Contains(proteinInterfacesTemplateText))
            {
                throw new ArgumentOutOfRangeException(nameof(filterProteinInterfacesLengthOutputFilename));
            }

            // Load fasta sequence files
            List <ISequence> sequenceList = SequenceFileHandler.LoadSequenceFileList(sequenceListFileArray, StaticValues.MolNameProteinAcceptedValues);

            // Get a list of the PDB Unique IDs with unique chain IDs which are wanted, ignoring others which may be present e.g. dna
            var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequenceList);

            // Get list of PDB Unique IDs
            List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequenceList);

            // Check PDB Unique IDs were successfully loaded
            if (pdbIdList == null || pdbIdList.Count == 0)
            {
                //throw new Exception("PDB ID List is empty or could not be loaded.");

                ProgressActionSet.Report("Error: Sequence list could not be loaded", progressActionSet);
                return;
            }

            // 3: Get a list of PDB files found in user specified directory

            string[] pdbFilesArray = ProteinDataBankFileOperations.RemoveNonWhiteListedPdbIdFromPdbFilesArray(pdbIdList, ProteinDataBankFileOperations.GetPdbFilesArray(pdbFileDirectoryLocationArray));

            // Check all PDB files are found
            List <string> missingPdbFilesList = ProteinDataBankFileOperations.CheckForMissingPdbFiles(pdbFilesArray, pdbIdList);

            if (missingPdbFilesList != null && missingPdbFilesList.Count > 0)
            {
                ProgressActionSet.Report("Missing PDB Files: " + string.Join(", ", missingPdbFilesList), progressActionSet);
            }



            ProgressActionSet.StartAction(pdbFilesArray.Length, progressActionSet);


            int progressIncrement = 0;

            var proteinInterfacesCountResultWithLengths = new Dictionary <string, List <string> >();

            var startTicks = DateTime.Now.Ticks;

            // 4: Loop through each pdb file
            for (int pdbFileNumber = 0; pdbFileNumber < pdbFilesArray.Length + 1; pdbFileNumber++) // +1 is for progress update
            {
                if (progressIncrement > 0)
                {
                    ProgressActionSet.ProgressAction(progressIncrement, progressActionSet);
                    progressIncrement = 0;
                    if (pdbFileNumber >= pdbFilesArray.Length)
                    {
                        break;
                    }
                }
                ProgressActionSet.EstimatedTimeRemainingAction(startTicks, pdbFileNumber, pdbFilesArray.Length, progressActionSet);

                progressIncrement++;

                // get unique id of pdb file
                string pdbFilename = pdbFilesArray[pdbFileNumber];
                string proteinId   = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);

                // check pdb unique id was in the loaded sequence list
                if (!pdbIdList.Contains(proteinId))
                {
                    continue;
                }

                // perform clustering to detect interaction proteinInterfaces
                ClusterProteinDataBankFileResult clusterPdbFileResult = Clustering.ClusterProteinDataBankFile(cancellationToken, maxAtomInterationDistance, minimumProteinInterfaceDensity, pdbFilename, pdbIdChainIdList, ClusteringMethodOptions.ClusterWithResidueSequenceIndex, -1, -1, null);

                if (clusterPdbFileResult == null)
                {
                    continue;
                }

                int[] proteinInterfacesCount = new int[clusterPdbFileResult.ClusteringFullResultListContainer.ChainList.Count];

                for (int chainIndex = 0; chainIndex < clusterPdbFileResult.ClusteringFullResultListContainer.ChainList.Count; chainIndex++)
                {
                    int totalProteinInterfaces = clusterPdbFileResult.ProteinInterfaceAnalysisResultData.ProteinInterfacesSequenceAndPositionDataList.Count(proteinInterface => proteinInterface.FullProteinInterfaceId.ChainId == chainIndex);

                    proteinInterfacesCount[chainIndex] = totalProteinInterfaces;
                }

                var proteinInterfacesCountStr = string.Join(" ", proteinInterfacesCount.OrderBy(x => x));

                List <ProteinInterfaceSequenceAndPositionData> proteinInterfaces = clusterPdbFileResult.ProteinInterfaceAnalysisResultData.ProteinInterfacesSequenceAndPositionDataList;
                int[] proteinInterfacesLength = new int[proteinInterfaces.Count];

                for (int index = 0; index < proteinInterfaces.Count; index++)
                {
                    ProteinInterfaceSequenceAndPositionData proteinInterface = proteinInterfaces[index];

                    proteinInterfacesLength[index] = proteinInterface.ProteinInterfaceLength;
                }

                var proteinInterfacesLengthStr = string.Join(" ", proteinInterfacesLength.Distinct().OrderBy(x => x));

                if (proteinInterfacesLength.Length == 0)
                {
                    proteinInterfacesLengthStr = 0.ToString();
                }

                var chainsCountStr = clusterPdbFileResult.ClusteringFullResultListContainer.ChainList.Count;

                if (filterProteinInterfaceCountsWithoutLengths)
                {
                    var combinedKeyAll = "chains [" + chainsCountStr + "] proteinInterfaces [" + proteinInterfacesCountStr + "]";

                    if (!proteinInterfacesCountResultWithLengths.ContainsKey(combinedKeyAll))
                    {
                        proteinInterfacesCountResultWithLengths.Add(combinedKeyAll, new List <string>());
                    }

                    proteinInterfacesCountResultWithLengths[combinedKeyAll].Add(proteinId);
                }

                if (filterProteinInterfaceCountsWithLengths)
                {
                    var combinedKeyWithLengths = "chains [" + chainsCountStr + "] proteinInterfaces [" + proteinInterfacesCountStr + "] lengths [" + proteinInterfacesLengthStr + "]";

                    if (!proteinInterfacesCountResultWithLengths.ContainsKey(combinedKeyWithLengths))
                    {
                        proteinInterfacesCountResultWithLengths.Add(combinedKeyWithLengths, new List <string>());
                    }

                    proteinInterfacesCountResultWithLengths[combinedKeyWithLengths].Add(proteinId);
                }
            }

            var confirmSaveList = new List <string>();

            foreach (var kvp in proteinInterfacesCountResultWithLengths)
            {
                var seq2 = new List <ISequence>(sequenceList);
                seq2 = FilterProteins.RemoveSequences(cancellationToken, seq2, kvp.Value, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                var saveFilename = filterProteinInterfacesLengthOutputFilename;
                saveFilename = saveFilename.Replace(proteinInterfacesTemplateText, kvp.Key);

                var actualSavedFilename = SequenceFileHandler.SaveSequencesAsFasta(seq2, saveFilename, true, fileExistsOptions, progressActionSet);

                if (!string.IsNullOrWhiteSpace(actualSavedFilename))
                {
                    confirmSaveList.Add(actualSavedFilename);
                }
            }

            // Confirm the total number of sequences saved is equal to original number loaded
            ConfirmSequencesSaved(pdbIdList, confirmSaveList, progressActionSet);

            ProgressActionSet.FinishAction(true, progressActionSet);
        }
        /// <summary>
        ///     Makes spreadsheets with scientific data outputs about given proteins.
        /// </summary>
        /// <param name="cancellationToken"></param>
        /// <param name="pdbFolders">The location of the PDB files</param>
        /// <param name="pdbIdList">The PDB files which should be used.</param>
        /// <param name="consoleTextBox"></param>
        /// <param name="progressBar">User proteinInterface progress bar for user feedback.</param>
        /// <param name="estimatedTimeRemainingLabel">User proteinInterface estimated time remaining label for user feedback.</param>
        /// <param name="requestedTotalThreads"></param>
        /// <returns>Returns the generated spreadsheets with scientific data.</returns>
        public static List <List <SpreadsheetCell[]> > MakeHomodimerStatisticsSpreadsheetsList(CancellationToken cancellationToken, decimal maxAtomInterationDistance, string[] pdbFolders, List <string> pdbIdList = null, Dictionary <string, List <string> > pdbIdChainIdList = null, ProgressActionSet progressActionSet = null, int requestedTotalThreads = -1)
        {
            if (pdbFolders == null || pdbFolders.Length == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(pdbFolders));
            }

            if (pdbIdList == null || pdbIdList.Count == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(pdbIdList));
            }

            if (progressActionSet == null)
            {
                throw new ArgumentNullException(nameof(progressActionSet));
            }

            // this method creates
            // 1. a list of interactions
            // 2. a list of symmetry percentage
            // 3. an "expected" heatmap by combining every possible a/b amino acid combination
            // 4. an actual heatmap for the proteinInterfaces
            // 5. normalised versions of both of the heatmaps

            string[] pdbFilesArray = ProteinDataBankFileOperations.RemoveNonWhiteListedPdbIdFromPdbFilesArray(pdbIdList, ProteinDataBankFileOperations.GetPdbFilesArray(pdbFolders));

            //var interactionRecordList = new List<ProteinInteractionRecord>();
            //var interactionMatchPercentageList = new List<InteractionMatchPercentage>();
            //var wholeProteinChainsAminoAcidCounter = new List<AminoAcidChainComposition>();
            //var interactionChainsAminoAcidCounter = new List<AminoAcidChainComposition>();
            //var interactionsAminoAcidToAminoAcidCounter = new AminoAcidPairCompositionMatrix();

            ////var wholeProteinAminoAcidToAminoAcidCounter2x2 = new AminoAcidPairCompositionMatrix(); // composition of every amino acid paired in every possible combination

            var workDivision = new WorkDivision <HomodimersStatisticsMinerTaskResult>(pdbFilesArray.Length, requestedTotalThreads);


            ProgressActionSet.StartAction(pdbFilesArray.Length, progressActionSet);



            int checkAllFilesProcessed     = 0;
            var lockCheckAllFilesProcessed = new object();

            var pdbFilesProcessed = new bool[pdbFilesArray.Length];

            Array.Clear(pdbFilesProcessed, 0, pdbFilesProcessed.Length);

            for (int threadIndex = 0; threadIndex < workDivision.ThreadCount; threadIndex++)
            {
                int localThreadIndex = threadIndex;

                Task <HomodimersStatisticsMinerTaskResult> task = Task.Run(() =>
                {
                    var result = new HomodimersStatisticsMinerTaskResult();

                    for (int pdbFileNumber = workDivision.ThreadFirstIndex[localThreadIndex]; pdbFileNumber <= workDivision.ThreadLastIndex[localThreadIndex]; pdbFileNumber++)
                    {
                        if (cancellationToken.IsCancellationRequested)
                        {
                            break;
                        }

                        lock (lockCheckAllFilesProcessed)
                        {
                            checkAllFilesProcessed++;
                            pdbFilesProcessed[pdbFileNumber] = true;
                        }

                        try
                        {
                            string pdbFilename = pdbFilesArray[pdbFileNumber];
                            string proteinId   = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);

                            // Check if the file found is included in the white list.
                            if (/*pdbIdList != null && */ !pdbIdList.Contains(proteinId))
                            {
                                ProgressActionSet.Report("Error: " + proteinId + " was not in the PDB ID white list.", progressActionSet);
                                continue;
                            }

                            List <AtomPair> interactions = SearchInteractions.FindInteractions(cancellationToken, maxAtomInterationDistance, pdbFilename, pdbIdChainIdList);

                            // Make a list to save interactions found.
                            var interactionMatchPercentage = new InteractionMatchPercentage(proteinId);

                            var chainAminoAcidCounterA1X1 = new AminoAcidChainComposition(proteinId, "A");
                            var chainAminoAcidCounterB1X1 = new AminoAcidChainComposition(proteinId, "B");

                            var chainInteractionAminoAcidCounterA = new AminoAcidChainComposition(proteinId, "A");
                            var chainInteractionAminoAcidCounterB = new AminoAcidChainComposition(proteinId, "B");

                            if (interactions != null && interactions.Count > 0)
                            {
                                interactionMatchPercentage.IncrementTotalInteractions(interactions.Count);

                                for (int interactionsIndex = 0; interactionsIndex < interactions.Count; interactionsIndex++)
                                {
                                    chainInteractionAminoAcidCounterA.IncrementAminoAcidCount(interactions[interactionsIndex].Atom1.resName.FieldValue);

                                    chainInteractionAminoAcidCounterB.IncrementAminoAcidCount(interactions[interactionsIndex].Atom2.resName.FieldValue);

                                    result.InteractionRecordList.Add(new ProteinInteractionRecord(proteinId, interactionsIndex + 1, interactions[interactionsIndex]));
                                    interactionMatchPercentage.AddResidueSequenceIndex(StaticValues.ChainA, interactions[interactionsIndex].Atom1.resSeq.FieldValue);
                                    interactionMatchPercentage.AddResidueSequenceIndex(StaticValues.ChainB, interactions[interactionsIndex].Atom2.resSeq.FieldValue);
                                    result.InteractionsAminoAcidToAminoAcidCounter.IncrementAminoAcidCount(interactions[interactionsIndex].Atom1.resName.FieldValue, interactions[interactionsIndex].Atom2.resName.FieldValue);
                                }
                            }

                            var chainIdList = pdbIdChainIdList != null ? (pdbIdChainIdList.ContainsKey(proteinId) ? pdbIdChainIdList[proteinId].ToArray() : null) : null;

                            ProteinChainListContainer proteinFileChains = ProteinDataBankFileOperations.PdbAtomicChains(pdbFilename, chainIdList, 2, 2, true);

                            if (proteinFileChains == null || proteinFileChains.ChainList == null || proteinFileChains.ChainList.Count != 2 ||
                                proteinFileChains.ChainList[StaticValues.ChainA] == null || proteinFileChains.ChainList[StaticValues.ChainA].AtomList == null || proteinFileChains.ChainList[StaticValues.ChainA].AtomList.Count == 0 ||
                                proteinFileChains.ChainList[StaticValues.ChainB] == null || proteinFileChains.ChainList[StaticValues.ChainB].AtomList == null || proteinFileChains.ChainList[StaticValues.ChainB].AtomList.Count == 0)
                            {
                                if (!File.Exists(pdbFilename))
                                {
                                    ProgressActionSet.Report("Error: " + pdbFilename + " (" + proteinId + ") file not found", progressActionSet);
                                }
                                else
                                {
                                    int proteinFileChainCount = ProteinDataBankFileOperations.PdbAtomicChainsCount(pdbFilename);
                                    ProgressActionSet.Report("Error: " + proteinId + " did not have exactly 2 chains (" + proteinFileChainCount + " chains found) - skipping.", progressActionSet);
                                }

                                continue;
                            }

                            // count total of how many of each type of amino acids are in Chain A.
                            for (int atomIndexA = 0; atomIndexA < proteinFileChains.ChainList[StaticValues.ChainA].AtomList.Count; atomIndexA++)
                            {
                                chainAminoAcidCounterA1X1.IncrementAminoAcidCount(proteinFileChains.ChainList[StaticValues.ChainA].AtomList[atomIndexA].resName.FieldValue);
                            }

                            // count total of how many of each type of amino acids are in Chain B.
                            for (int atomIndexB = 0; atomIndexB < proteinFileChains.ChainList[StaticValues.ChainB].AtomList.Count; atomIndexB++)
                            {
                                chainAminoAcidCounterB1X1.IncrementAminoAcidCount(proteinFileChains.ChainList[StaticValues.ChainB].AtomList[atomIndexB].resName.FieldValue);
                            }

                            interactionMatchPercentage.CalculatePercentage();
                            result.InteractionMatchPercentageList.Add(interactionMatchPercentage);
                            result.WholeProteinChainsAminoAcidCounter.Add(chainAminoAcidCounterA1X1);
                            result.WholeProteinChainsAminoAcidCounter.Add(chainAminoAcidCounterB1X1);
                            result.InteractionChainsAminoAcidCounter.Add(chainInteractionAminoAcidCounterA);
                            result.InteractionChainsAminoAcidCounter.Add(chainInteractionAminoAcidCounterB);
                        }
                        finally
                        {
                            workDivision.IncrementItemsCompleted(1);

                            ProgressActionSet.ProgressAction(1, progressActionSet);
                            ProgressActionSet.EstimatedTimeRemainingAction(workDivision.StartTicks, workDivision.ItemsCompleted, workDivision.ItemsToProcess, progressActionSet);
                        }
                    }

                    return(result);
                }, cancellationToken);
                workDivision.TaskList.Add(task);
            }


            workDivision.WaitAllTasks();

            ProgressActionSet.FinishAction(true, progressActionSet);

            // merge all instances of the results
            var spreadsheetTaskResult = new HomodimersStatisticsMinerTaskResult();

            foreach (var task in workDivision.TaskList.Where(t => t != null && t.IsCompleted && !t.IsCanceled && !t.IsFaulted && t.Result != null))
            {
                if (task.Result.InteractionChainsAminoAcidCounter != null && task.Result.InteractionChainsAminoAcidCounter.Count > 0)
                {
                    spreadsheetTaskResult.InteractionChainsAminoAcidCounter.AddRange(task.Result.InteractionChainsAminoAcidCounter);
                }

                if (task.Result.InteractionMatchPercentageList != null && task.Result.InteractionMatchPercentageList.Count > 0)
                {
                    spreadsheetTaskResult.InteractionMatchPercentageList.AddRange(task.Result.InteractionMatchPercentageList);
                }

                if (task.Result.InteractionRecordList != null && task.Result.InteractionRecordList.Count > 0)
                {
                    spreadsheetTaskResult.InteractionRecordList.AddRange(task.Result.InteractionRecordList);
                }

                if (task.Result.InteractionsAminoAcidToAminoAcidCounter != null)
                {
                    if (task.Result.InteractionsAminoAcidToAminoAcidCounter.AminoAcidToAminoAcid != null)
                    {
                        foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                        {
                            var totalGroups = AminoAcidGroups.AminoAcidGroups.GetTotalSubgroups(enumAminoAcidGroups);

                            for (int x = 0; x < totalGroups; x++)
                            {
                                for (int y = 0; y < totalGroups; y++)
                                {
                                    spreadsheetTaskResult.InteractionsAminoAcidToAminoAcidCounter.AminoAcidToAminoAcid[(int)enumAminoAcidGroups][x, y] +=
                                        task.Result.InteractionsAminoAcidToAminoAcidCounter.AminoAcidToAminoAcid[(int)enumAminoAcidGroups][x, y];
                                }
                            }
                        }
                    }
                }

                if (task.Result.WholeProteinChainsAminoAcidCounter != null && task.Result.WholeProteinChainsAminoAcidCounter.Count > 0)
                {
                    spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter.AddRange(task.Result.WholeProteinChainsAminoAcidCounter);
                }
            }


            if (pdbFilesProcessed.Count(file => file == false) > 0)
            {
                ProgressActionSet.Report("ERROR: " + pdbFilesProcessed.Count(file => file == false) + " PDB FILES WERE SKIPPED! 0x01", progressActionSet);
            }
            else
            {
                ProgressActionSet.Report("CHECK: NO PDB FILES WERE SKIPPED! 0x01", progressActionSet);
            }

            if (checkAllFilesProcessed != pdbFilesArray.Length)
            {
                ProgressActionSet.Report("ERROR: " + (pdbFilesArray.Length - checkAllFilesProcessed) + " PDB FILES WERE SKIPPED! 0x02", progressActionSet);
            }
            else
            {
                ProgressActionSet.Report("CHECK: NO PDB FILES WERE SKIPPED! 0x02", progressActionSet);
            }


            spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter = spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter.OrderBy(a => a.ProteinId).ThenBy(b => b.ChainId).ToList();
            spreadsheetTaskResult.InteractionChainsAminoAcidCounter  = spreadsheetTaskResult.InteractionChainsAminoAcidCounter.OrderBy(a => a.ProteinId).ThenBy(b => b.ChainId).ToList();

            AminoAcidChainComposition      wholeProteinChainsTotals = AminoAcidChainComposition.TotalFromAminoAcidChainCompositionList(spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter);
            AminoAcidChainComposition      interactionChainsTotals  = AminoAcidChainComposition.TotalFromAminoAcidChainCompositionList(spreadsheetTaskResult.InteractionChainsAminoAcidCounter);
            AminoAcidPairCompositionMatrix wholeProteinAminoAcidToAminoAcidCounter1X1 = AminoAcidChainComposition.ConvertToMatrix(wholeProteinChainsTotals);

            var results = new List <List <SpreadsheetCell[]> >();

            {
                /* start test */
                var spreadsheet1 = new List <SpreadsheetCell[]>();
                spreadsheet1.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% TEST SHEET 0"), });
                spreadsheet1.Add(new[] { new SpreadsheetCell("TEST SHEET 0"), });
                foreach (AminoAcidChainComposition item in spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter)
                {
                    //spreadsheet1.Add(item.ProteinId);
                    //spreadsheet1.Add(item.ChainId);
                    spreadsheet1.Add(item.SpreadsheetDataRow());
                }
                results.Add(spreadsheet1);
                spreadsheet1 = null;
                /* end test */
            }
            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

            {
                var spreadsheet2 = new List <SpreadsheetCell[]>();
                spreadsheet2.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Interaction Symmetry"), });
                spreadsheet2.Add(new[] { new SpreadsheetCell("Homodimers - List - Interaction Count And Interaction Match Percentage (Symmetry Measurement)") });
                spreadsheet2.Add(InteractionMatchPercentage.SpreadsheetColumnHeadersRow());
                var range2 = spreadsheetTaskResult.InteractionMatchPercentageList.Select(record => record.SpreadsheetDataRow()).ToList();
                //range2.Sort();
                range2 = range2
                         .OrderBy(a => a[0].CellData)
                         .ThenBy(a => a[1].CellData)
                         .ThenBy(a => a[2].CellData)
                         .ThenBy(a => a[3].CellData)
                         .ThenBy(a => a[4].CellData)
                         .ThenBy(a => a[5].CellData)
                         .ThenBy(a => a[6].CellData)
                         .ThenBy(a => a[7].CellData)
                         .ThenBy(a => a[8].CellData)
                         .ToList();
                spreadsheet2.AddRange(range2);
                range2 = null;
                results.Add(spreadsheet2);

                var spreadsheetHistogram2 = new List <SpreadsheetCell[]>();
                spreadsheetHistogram2.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% HG Interaction Symmetry"), });
                spreadsheetHistogram2.Add(new[] { new SpreadsheetCell("Homodimers - List - Interaction Count And Interaction Match Percentage (Symmetry Measurement) Histogram") });
                spreadsheetHistogram2.AddRange(Histogram.MatrixToHistogram(spreadsheet2.ToArray(), Histogram.MakeBinDecimals(0, 100, 9, 10), new[] { 6, 7, 8 }, 2, -1, true));
                results.Add(spreadsheetHistogram2);

                spreadsheet2          = null;
                spreadsheetHistogram2 = null;
            }

            //
            {
                var spreadsheet3 = new List <SpreadsheetCell[]>();
                spreadsheet3.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Interaction Records"), });
                spreadsheet3.Add(new[] { new SpreadsheetCell("Homodimers - List - Protein Interaction Record"), });
                spreadsheet3.Add(ProteinInteractionRecord.TsvColumnHeadersRow());
                var range3 = spreadsheetTaskResult.InteractionRecordList.Select(record => record.SpreadsheetDataRow()).ToList();
                //range3.Sort();
                range3 = range3
                         .OrderBy(a => a[0].CellData)
                         .ThenBy(a => a[1].CellData)
                         .ThenBy(a => a[3].CellData)
                         .ThenBy(a => a[5].CellData)
                         .ThenBy(a => a[13].CellData)
                         .ThenBy(a => a[15].CellData)
                         .ToList();
                spreadsheet3.AddRange(range3);
                range3 = null;

                results.Add(spreadsheet3);

                var spreadsheetHistogram3 = new List <SpreadsheetCell[]>();
                spreadsheetHistogram3.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Interaction Records Histogram"), });
                spreadsheetHistogram3.Add(new[] { new SpreadsheetCell("Homodimers - List - Protein Interaction Record - Histogram"), });
                spreadsheetHistogram3.AddRange(Histogram.MatrixToHistogram(spreadsheet3.ToArray(), Histogram.MakeBinDecimals(0m, 5m, 0m, 0.05m), new[] { 1 }, 2, -1, true));
                results.Add(spreadsheetHistogram3);

                //spreadsheet3 = Histogram.InsertMatrixOverwrite(spreadsheet3.ToArray(), histogram3, 2, Histogram.MaxColumns(spreadsheet3.ToArray()) + 1).ToList();
                spreadsheet3          = null;
                spreadsheetHistogram3 = null;
            }
            //

            {
                var spreadsheet4 = new List <SpreadsheetCell[]>();
                spreadsheet4.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Interaction Count - A-Z"), });
                spreadsheet4.Add(new[] { new SpreadsheetCell("Homodimers - List - Protein Amino Acid Count - Interactions - A to Z"), });
                spreadsheet4.Add(AminoAcidChainComposition.SpreadsheetTitleRow());
                var range4 = spreadsheetTaskResult.InteractionChainsAminoAcidCounter.Select(record => record.SpreadsheetDataRow()).ToList();
                //range4.Sort();
                range4 = range4
                         .OrderBy(a => a[0].CellData)
                         .ThenBy(a => a[1].CellData)
                         .ToList();
                spreadsheet4.AddRange(range4);
                range4 = null;
                spreadsheet4.Add(interactionChainsTotals.SpreadsheetDataRow());
                results.Add(spreadsheet4);
                spreadsheet4 = null;
            }
            //

            {
                foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                {
                    var spreadsheet5 = new List <SpreadsheetCell[]>();
                    spreadsheet5.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Interaction Count - Groups " + enumAminoAcidGroups), });
                    spreadsheet5.Add(new[] { new SpreadsheetCell("Homodimers - List - Protein Amino Acid Count - Interactions - Acid Groups " + enumAminoAcidGroups), });
                    spreadsheet5.Add(AminoAcidChainComposition.SpreadsheetGroupsTitleRow(enumAminoAcidGroups));
                    var range5 = spreadsheetTaskResult.InteractionChainsAminoAcidCounter.Select(record => record.SpreadsheetGroupsDataRow(enumAminoAcidGroups)).ToList();
                    //range4.Sort();
                    range5 = range5
                             .OrderBy(a => a[0].CellData)
                             .ThenBy(a => a[1].CellData)
                             .ToList();
                    spreadsheet5.AddRange(range5);
                    range5 = null;
                    spreadsheet5.Add(interactionChainsTotals.SpreadsheetGroupsDataRow(enumAminoAcidGroups));

                    results.Add(spreadsheet5);
                    spreadsheet5 = null;
                }
            }
            //

            {
                var spreadsheet6 = new List <SpreadsheetCell[]>();
                spreadsheet6.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Entire Count - A-Z"), });
                spreadsheet6.Add(new[] { new SpreadsheetCell("Homodimers - List - Protein Amino Acid Count - All Atoms - A to Z"), });
                spreadsheet6.Add(AminoAcidChainComposition.SpreadsheetTitleRow());
                var range6 = spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter.Select(record => record.SpreadsheetDataRow()).ToList();
                //range6.Sort();
                range6 = range6
                         .OrderBy(a => a[0].CellData)
                         .ThenBy(a => a[1].CellData)
                         .ToList();
                spreadsheet6.AddRange(range6);
                range6 = null;
                spreadsheet6.Add(wholeProteinChainsTotals.SpreadsheetDataRow());
                results.Add(spreadsheet6);

                var spreadsheetHistogram6 = new List <SpreadsheetCell[]>();
                spreadsheetHistogram6.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Entire Count - A-Z - Historgram"), });
                spreadsheetHistogram6.Add(new[] { new SpreadsheetCell("Homodimers - List - Protein Amino Acid Count - All Atoms - A to Z - Histogram"), });
                spreadsheetHistogram6.AddRange(Histogram.MatrixToHistogram(spreadsheet6.ToArray(), Histogram.MakeBinDecimals(0, 10500, 0, 500), new[] { 28 }, 2, -1, true));
                spreadsheetHistogram6.Add(new [] { new SpreadsheetCell(""), });
                spreadsheetHistogram6.AddRange(Histogram.MatrixToHistogram(spreadsheet6.ToArray(), Histogram.MakeBinDecimals(0, 1000, 0, 100), new[] { 28 }, 2, -1, true));
                results.Add(spreadsheetHistogram6);

                spreadsheet6          = null;
                spreadsheetHistogram6 = null;
            }
            //

            {
                foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                {
                    var spreadsheet7 = new List <SpreadsheetCell[]>();
                    spreadsheet7.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Entire Count - Groups " + enumAminoAcidGroups), });

                    spreadsheet7.Add(new[] { new SpreadsheetCell("Homodimers - List - Protein Amino Acid Count - All Atoms - Acid Groups " + enumAminoAcidGroups), });
                    spreadsheet7.Add(AminoAcidChainComposition.SpreadsheetGroupsTitleRow(enumAminoAcidGroups));
                    var range7 = spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter.Select(record => record.SpreadsheetGroupsDataRow(enumAminoAcidGroups)).ToList();
                    //range7.Sort();
                    range7 = range7
                             .OrderBy(a => a[0].CellData)
                             .ThenBy(a => a[1].CellData)
                             .ToList();
                    spreadsheet7.AddRange(range7);
                    range7 = null;
                    spreadsheet7.Add(wholeProteinChainsTotals.SpreadsheetGroupsDataRow(enumAminoAcidGroups));

                    results.Add(spreadsheet7);
                    spreadsheet7 = null;
                }
            }


            // convert to percentage for creating mean average protein composition
            var meanProteinComposition = new AminoAcidChainComposition("Mean Composition", "-");

            foreach (AminoAcidChainComposition aminoAcidChainComposition in spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter)
            {
                // get percentage for row
                AminoAcidChainComposition percentage = AminoAcidChainComposition.ConvertToPercentage(aminoAcidChainComposition);

                // add percentage to overall tally

                foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                {
                    for (int x = 0; x < AminoAcidGroups.AminoAcidGroups.GetTotalSubgroups(enumAminoAcidGroups); x++)
                    {
                        meanProteinComposition.AminoAcidGroupsCount[(int)enumAminoAcidGroups][x] += (percentage.AminoAcidGroupsCount[(int)enumAminoAcidGroups][x] / spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter.Count);
                    }
                }
            }

            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

            {
                /* start test */
                var spreadsheet8 = new List <SpreadsheetCell[]>();
                spreadsheet8.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% TEST SHEET 1"), }); // Worksheet name.
                spreadsheet8.Add(new[] { new SpreadsheetCell("TEST SHEET 1"), });                              // Spreadsheet title

                spreadsheet8.Add(new[] { new SpreadsheetCell(string.Empty), });
                spreadsheet8.Add(meanProteinComposition.SpreadsheetDataRow());
                foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                {
                    spreadsheet8.Add(meanProteinComposition.SpreadsheetGroupsDataRow(enumAminoAcidGroups));
                }
                results.Add(spreadsheet8);
                spreadsheet8 = null;
                /* end test */
            }

            AminoAcidPairCompositionMatrix meanProteinMatrix = AminoAcidChainComposition.ConvertToMatrix(meanProteinComposition);

            {
                var spreadsheet9 = new List <SpreadsheetCell[]>();
                spreadsheet9.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% HM All Atoms 3x3"), });                            // Worksheet name.

                spreadsheet9.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Average Chain Composition"), }); // Spreadsheet title.

                //spreadsheet9.Add(new[] { new SpreadsheetCell(string.Empty), });
                //spreadsheet9.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Average Chain Composition - Percentage Composition - A to Z"), }); // Section title.
                //spreadsheet9.AddRange(meanProteinMatrix.SpreadsheetAminoAcidColorGroupsHeatMap());

                foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                {
                    spreadsheet9.Add(new[] { new SpreadsheetCell(string.Empty), });
                    spreadsheet9.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Average Chain Composition - Percentage Composition - Acid Groups " + enumAminoAcidGroups), }); // Section title.
                    spreadsheet9.AddRange(meanProteinMatrix.SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups));
                }
                results.Add(spreadsheet9);
                spreadsheet9 = null;
            }

            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

            //if (outputAllAtoms1x1)
            //{
            AminoAcidPairCompositionMatrix wholeProteinAminoAcidToAminoAcidCounterPercentage1X1 = AminoAcidPairCompositionMatrix.CalculatePercentageMatrix(wholeProteinAminoAcidToAminoAcidCounter1X1);

            {
                var spreadsheet10 = new List <SpreadsheetCell[]>();
                spreadsheet10.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% HM All Atoms 1x1") });                      // Worksheet name.

                spreadsheet10.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Overall Composition") }); // Spreadsheet title.

                //spreadsheet10.Add(new[] { new SpreadsheetCell(string.Empty)});
                //spreadsheet10.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Overall Percentage Composition - A to Z")}); // Section title.
                //spreadsheet10.AddRange(wholeProteinAminoAcidToAminoAcidCounterPercentage1X1.SpreadsheetAminoAcidColorGroupsHeatMap());

                foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                {
                    spreadsheet10.Add(new[] { new SpreadsheetCell(string.Empty) });
                    spreadsheet10.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Overall Percentage Composition - Acid Groups " + enumAminoAcidGroups) }); // Section title.
                    spreadsheet10.AddRange(wholeProteinAminoAcidToAminoAcidCounterPercentage1X1.SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups));
                }

                AminoAcidPairCompositionMatrix wholeProteinAminoAcidToAminoAcidCounterNormalised1X1 = AminoAcidPairCompositionMatrix.NormalizeWithCompositionMatrix(wholeProteinAminoAcidToAminoAcidCounterPercentage1X1, UniProtProteinDatabaseComposition.AminoAcidCompositionAsMatrix());

                //spreadsheet10.Add(new[] { new SpreadsheetCell(string.Empty)});
                //spreadsheet10.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Overall UniProt Normalised - A to Z ")}); // Section title.
                //spreadsheet10.AddRange(wholeProteinAminoAcidToAminoAcidCounterNormalised1X1.SpreadsheetAminoAcidColorGroupsHeatMap());

                foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                {
                    spreadsheet10.Add(new[] { new SpreadsheetCell(string.Empty) });
                    spreadsheet10.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Overall UniProt Normalised - Acid Groups " + enumAminoAcidGroups) }); // Section title.
                    spreadsheet10.AddRange(wholeProteinAminoAcidToAminoAcidCounterNormalised1X1.SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups));
                }

                AminoAcidPairCompositionMatrix wholeProteinAminoAcidToAminoAcidCounterDifference1X1 = AminoAcidPairCompositionMatrix.DifferenceWithCompositionMatrix(wholeProteinAminoAcidToAminoAcidCounterPercentage1X1, UniProtProteinDatabaseComposition.AminoAcidCompositionAsMatrix());

                //spreadsheet10.Add(new[] { new SpreadsheetCell(string.Empty)});
                //spreadsheet10.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Overall A to Z - UniProt Difference")}); // Section title.
                //spreadsheet10.AddRange(wholeProteinAminoAcidToAminoAcidCounterDifference1X1.SpreadsheetAminoAcidColorGroupsHeatMap());

                foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                {
                    spreadsheet10.Add(new[] { new SpreadsheetCell(string.Empty) });
                    spreadsheet10.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Overall Acid Groups " + enumAminoAcidGroups + " - UniProt Difference") }); // Section title.
                    spreadsheet10.AddRange(wholeProteinAminoAcidToAminoAcidCounterDifference1X1.SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups));
                }

                results.Add(spreadsheet10);
                spreadsheet10 = null;
            }
            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

            {
                AminoAcidPairCompositionMatrix interactionsAminoAcidToAminoAcidCounterPercentage = AminoAcidPairCompositionMatrix.CalculatePercentageMatrix(spreadsheetTaskResult.InteractionsAminoAcidToAminoAcidCounter);

                var spreadsheet11 = new List <SpreadsheetCell[]>();
                spreadsheet11.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% HM Interactions Only") });    // Worksheet name.

                spreadsheet11.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Interactions Only") }); // Spreadsheet title.

                //spreadsheet11.Add(new[] { new SpreadsheetCell(string.Empty)});
                //spreadsheet11.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Interactions Only - A to Z")}); // Section title.
                //spreadsheet11.AddRange(spreadsheetTaskResult.InteractionsAminoAcidToAminoAcidCounter.SpreadsheetAminoAcidColorGroupsHeatMap());

                foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                {
                    spreadsheet11.Add(new[] { new SpreadsheetCell(string.Empty) });
                    spreadsheet11.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Interactions Only - Acid Groups " + enumAminoAcidGroups) }); // Section title.
                    spreadsheet11.AddRange(spreadsheetTaskResult.InteractionsAminoAcidToAminoAcidCounter.SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups));
                }

                AminoAcidPairCompositionMatrix interactionsAminoAcidToAminoAcidCounterNormalised = AminoAcidPairCompositionMatrix.NormalizeWithCompositionMatrix(interactionsAminoAcidToAminoAcidCounterPercentage, UniProtProteinDatabaseComposition.AminoAcidCompositionAsMatrix());

                //spreadsheet11.Add(new[] { new SpreadsheetCell(string.Empty)});
                //spreadsheet11.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Interactions Only - A to Z - UniProt Normalised")}); // Section title.
                //spreadsheet11.AddRange(interactionsAminoAcidToAminoAcidCounterNormalised.SpreadsheetAminoAcidColorGroupsHeatMap());

                foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                {
                    spreadsheet11.Add(new[] { new SpreadsheetCell(string.Empty) });
                    spreadsheet11.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Interactions Only - Acid Groups " + enumAminoAcidGroups + " - UniProt Normalised") }); // Section title.
                    spreadsheet11.AddRange(interactionsAminoAcidToAminoAcidCounterNormalised.SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups));
                }

                AminoAcidPairCompositionMatrix interactionsAminoAcidToAminoAcidCounterDifference = AminoAcidPairCompositionMatrix.DifferenceWithCompositionMatrix(interactionsAminoAcidToAminoAcidCounterPercentage, UniProtProteinDatabaseComposition.AminoAcidCompositionAsMatrix());

                //spreadsheet11.Add(new[] { new SpreadsheetCell(string.Empty)});
                //spreadsheet11.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Interactions Only - A to Z - UniProt Difference")}); // Section title.
                //spreadsheet11.AddRange(interactionsAminoAcidToAminoAcidCounterDifference.SpreadsheetAminoAcidColorGroupsHeatMap());

                foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                {
                    spreadsheet11.Add(new[] { new SpreadsheetCell(string.Empty) });
                    spreadsheet11.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Interactions Only - Acid Groups " + enumAminoAcidGroups + " - UniProt Difference") }); // Section title.
                    spreadsheet11.AddRange(interactionsAminoAcidToAminoAcidCounterDifference.SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups));
                }

                results.Add(spreadsheet11);

                spreadsheet11 = null;
            }
            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

            {
                var spreadsheet12 = new List <SpreadsheetCell[]>();
                spreadsheet12.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% HM Interactions v Homodimers") });                                              // Worksheet name.

                spreadsheet12.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Difference between homodimer composition and homodimer interactions") }); // Spreadsheet title
                spreadsheet12.Add(new[] { new SpreadsheetCell(string.Empty) });

                //spreadsheet12.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Difference between homodimer composition and homodimer interactions - A to Z")}); // Section title
                //spreadsheet12.AddRange(AminoAcidPairCompositionMatrix.DifferenceWithCompositionMatrix(wholeProteinAminoAcidToAminoAcidCounterPercentage1X1, spreadsheetTaskResult.InteractionsAminoAcidToAminoAcidCounter).SpreadsheetAminoAcidColorGroupsHeatMap());
                //spreadsheet12.Add(new[] { new SpreadsheetCell(string.Empty)});

                foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                {
                    spreadsheet12.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Difference between homodimer composition and homodimer interactions - Acid Groups " + enumAminoAcidGroups) }); // Section title.
                    spreadsheet12.AddRange(AminoAcidPairCompositionMatrix.DifferenceWithCompositionMatrix(wholeProteinAminoAcidToAminoAcidCounterPercentage1X1, spreadsheetTaskResult.InteractionsAminoAcidToAminoAcidCounter).SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups));
                    spreadsheet12.Add(new[] { new SpreadsheetCell(string.Empty) });
                }

                results.Add(spreadsheet12);
            }
            ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

            return(results);
        }
Ejemplo n.º 7
0
        /// <summary>
        ///     Filters the given FASTA files and PDB files with the given options and saves the results to disk.  Data needs to be
        ///     cleaned for two reasons, firstly to not pollute or distort the results, and secondly to save unnecessary processing
        ///     operations.
        /// </summary>
        /// <param name="cancellationToken"></param>
        /// <param name="pdbFilesFolders"></param>
        /// <param name="fastaFiles"></param>
        /// <param name="proteinOperationOptionFlags"></param>
        /// <param name="saveFastaFilenameTemplate"></param>
        /// <param name="consoleTextBox"></param>
        /// <param name="progressBar"></param>
        /// <param name="estimatedTimeRemaining"></param>
        public static void CleanProteins(CancellationToken cancellationToken, decimal maxAtomInterationDistance, string[] pdbFilesFolders, string[] fastaFiles, ProteinOperation proteinOperationOptionFlags, string saveFastaFilenameTemplate, ProgressActionSet progressActionSet, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            if (pdbFilesFolders == null || pdbFilesFolders.Length == 0)
            {
                if (proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveMultipleModelsInStructure) || proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonInteractingProteinsInStructure) || proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonSymmetricalInStructure) || proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveWrongNumberOfChainsInStructure))
                {
                    throw new ArgumentOutOfRangeException(nameof(pdbFilesFolders));
                }
            }

            if (fastaFiles == null || fastaFiles.Length == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(fastaFiles));
            }

            if (string.IsNullOrWhiteSpace(saveFastaFilenameTemplate))
            {
                throw new ArgumentOutOfRangeException(nameof(saveFastaFilenameTemplate));
            }

            string[] pdbFilesArray = ProteinDataBankFileOperations.GetPdbFilesArray(pdbFilesFolders);
            //List<string> pdbIdList = null;
            int    beforeCount             = 0;
            int    afterCount              = 0;
            string saveFilename            = saveFastaFilenameTemplate;
            var    currentProteinOperation = ProteinOperation.LoadFile;

            int[] numberSequencesLoaded;
            var   sequences = new List <ISequence> [3];

            //UserProteinInterfaceOperations.TextBoxClear(consoleTextBox);
            ProgressActionSet.Report("Filtering proteins.", progressActionSet);

            // Load fasta/sequence files.
            sequences[0] = SequenceFileHandler.LoadSequenceFileList(fastaFiles, StaticValues.MolNameProteinAcceptedValues, out numberSequencesLoaded, true);
            var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequences[0]);

            for (int numberSequencesLoadedIndex = 0; numberSequencesLoadedIndex < numberSequencesLoaded.Length; numberSequencesLoadedIndex++)
            {
                if (numberSequencesLoaded[numberSequencesLoadedIndex] > 0)
                {
                    ProgressActionSet.Report("Loaded " + numberSequencesLoaded[numberSequencesLoadedIndex] / 2 + " proteins from file: " + fastaFiles[numberSequencesLoadedIndex], progressActionSet);
                }
                else
                {
                    ProgressActionSet.Report("Error could not load file: " + fastaFiles[numberSequencesLoadedIndex], progressActionSet);
                }
            }

            if (numberSequencesLoaded.Count(a => a > 0) == 0)
            {
                return;
            }

            // Replace placeholder variable names.
            saveFilename = saveFilename.Replace("%date%", DateTime.Now.ToString("yyyy-MM-dd"));
            saveFilename = saveFilename.Replace("%time%", DateTime.Now.ToString("HH.mm.ss"));

            // Save initial loaded sequences.

            if (File.Exists(saveFilename))
            {
                if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                {
                    saveFilename = FileExistsHandler.FindNextFreeOutputFilename(saveFilename);
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                {
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                {
                    return;
                }
            }

            // Removes any entries not having a protein alphabet.
            while (currentProteinOperation != ProteinOperation.Finished)
            {
                if (cancellationToken.IsCancellationRequested)
                {
                    break;
                }

                currentProteinOperation = (ProteinOperation)((int)currentProteinOperation * 2);
                sequences[1]            = null;
                sequences[2]            = null;
                var sequencesDescriptions = new string[3];

                if (currentProteinOperation == ProteinOperation.Finished)
                {
                    break;
                }
                if (currentProteinOperation == ProteinOperation.RemoveNonProteinAlphabetInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonProteinAlphabetInSequence))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveWrongNumberOfChainsInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveWrongNumberOfChainsInSequence))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveExactDuplicatesInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveExactDuplicatesInSequence))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveNonHomodimersInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonHomodimersInSequence))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveWrongNumberOfChainsInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveWrongNumberOfChainsInStructure))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveMultipleModelsInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveMultipleModelsInStructure))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveNonInteractingProteinsInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonInteractingProteinsInStructure))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveNonSymmetricalInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonSymmetricalInStructure))
                {
                    continue;
                }

                // Count sequences before operation.
                beforeCount = sequences[0].Count / 2;

                // Update user about what is happening.
                ProgressActionSet.Report("", progressActionSet);
                ProgressActionSet.Report("Removing " + ProteinOperationString(currentProteinOperation) + " entries [from " + beforeCount + " proteins]", progressActionSet);

                // Start stopwatch to count duration of operation.
                Stopwatch stopwatch = Stopwatch.StartNew();

                // Perform specified operation.
                switch (currentProteinOperation)
                {
                case ProteinOperation.RemoveNonProteinAlphabetInSequence:
                {
                    sequencesDescriptions[0] = "01 - Removed non-protein sequences (sequence filter)";
                    sequences[0]             = FilterProteins.RemoveNonProteinAlphabetSequences(cancellationToken, sequences[0], progressActionSet);
                    break;
                }

                case ProteinOperation.RemoveWrongNumberOfChainsInSequence:
                {
                    sequencesDescriptions[0] = "02 - Removed non-dimers (sequence filter)";
                    sequences[0]             = FilterProteins.RemoveSequencesWithIncorrectNumberOfChains(cancellationToken, sequences[0], 2, progressActionSet);
                    break;
                }

                case ProteinOperation.RemoveExactDuplicatesInSequence:
                {
                    sequencesDescriptions[0] = "03 - Removed exact duplicates (sequence filter)";
                    sequences[0]             = FilterProteins.RemoveDuplicates(cancellationToken, sequences[0], progressActionSet);
                    break;
                }

                case ProteinOperation.RemoveNonHomodimersInSequence:
                {
                    // homodimers - all types - unfiltered for interactions or symmetry

                    var result = FilterProteins.SplitDimerTypes(cancellationToken, sequences[0], 30, 90, progressActionSet);

                    sequencesDescriptions[0] = "04 - Homodimers only (sequence filter)";
                    sequences[0]             = result.HomoDimerPdbIdList;

                    sequencesDescriptions[1] = "04 - Heterodimers only (sequence filter)";
                    sequences[1]             = result.HeteroDimerPdbIdList;

                    sequencesDescriptions[2] = "04 - Homology dimers only (sequence filter)";
                    sequences[2]             = result.HomologyDimerPdbIdList;
                    break;
                }

                case ProteinOperation.RemoveMultipleModelsInStructure:
                {
                    sequencesDescriptions[0] = "05 - Removed multiple models (structure filter)";
                    List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]);
                    pdbIdList    = FilterProteins.RemoveMultipleStructureModels(cancellationToken, pdbFilesFolders, pdbIdList, progressActionSet);
                    sequences[0] = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesInList);
                    break;
                }

                case ProteinOperation.RemoveWrongNumberOfChainsInStructure:
                {
                    sequencesDescriptions[0] = "06 - Removed non-dimers (structure filter)";
                    List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]);

                    //var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequences[0]);

                    pdbIdList    = FilterProteins.RemoveStructuresWithIncorrectNumberOfChains(cancellationToken, pdbFilesFolders, pdbIdList, pdbIdChainIdList, 2, progressActionSet);
                    sequences[0] = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesInList);
                    break;
                }

                case ProteinOperation.RemoveNonInteractingProteinsInStructure:
                {
                    // Make copy of sequences as we will split the list into two parts - with and without interactions.
                    sequences[1] = new List <ISequence>(sequences[0]);

                    // Get pdb id list from sequences, to check for pdb file, load, perform processing.
                    List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]);

                    // Makes a list of sequences with interactions.
                    pdbIdList = FilterProteins.RemoveSequencesWithoutInteractions(cancellationToken, maxAtomInterationDistance, pdbFilesFolders, pdbIdList, pdbIdChainIdList, progressActionSet);

                    // Remove any protein not in the list, keep the ones in the list.
                    sequencesDescriptions[0] = "08 - dimers - with interactions - unfiltered for symmetry";
                    sequences[0]             = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                    sequencesDescriptions[1] = "07 - dimers - no observed interactions";
                    sequences[1]             = FilterProteins.RemoveSequences(cancellationToken, sequences[1], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesInList);
                    break;
                }

                case ProteinOperation.RemoveNonSymmetricalInStructure:
                {
                    // Make copy of sequences as we will split the list into two parts - with and without symmetry.
                    List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]);
                    sequences[1] = new List <ISequence>(sequences[0]);
                    sequences[2] = new List <ISequence>(sequences[0]);
                    Dictionary <string, decimal> symmetryPercentage = FilterProteins.CalculateStructureSymmetry(cancellationToken, maxAtomInterationDistance, pdbFilesFolders, pdbIdList, pdbIdChainIdList, progressActionSet);

                    var pdbSymmetrical     = new List <string>();
                    var pdbPartSymmetrical = new List <string>();
                    var pdbNonSymmetrical  = new List <string>();

                    foreach (var symmetryPercentageKeyValuePair in symmetryPercentage)
                    {
                        if (symmetryPercentageKeyValuePair.Value == 0.0m)
                        {
                            pdbNonSymmetrical.Add(symmetryPercentageKeyValuePair.Key);
                        }
                        else if (symmetryPercentageKeyValuePair.Value == 100.0m)
                        {
                            pdbSymmetrical.Add(symmetryPercentageKeyValuePair.Key);
                        }
                        else if (symmetryPercentageKeyValuePair.Value > 0.0m && symmetryPercentageKeyValuePair.Value < 100.0m)
                        {
                            pdbPartSymmetrical.Add(symmetryPercentageKeyValuePair.Key);
                        }
                        else
                        {
                            ProgressActionSet.Report("Error: Out of bounds symmetry value of " + symmetryPercentageKeyValuePair.Value + " was found in " + symmetryPercentageKeyValuePair.Key + ".", progressActionSet);
                        }
                    }

                    sequencesDescriptions[0] = "11 - dimers - with interactions - 100% symmetrical";
                    sequences[0]             = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbSymmetrical, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                    sequencesDescriptions[1] = "10 - dimers - with interactions - 1% to 99% symmetrical";
                    sequences[1]             = FilterProteins.RemoveSequences(cancellationToken, sequences[1], pdbPartSymmetrical, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                    sequencesDescriptions[2] = "09 - dimers - with interactions - 0% symmetrical";
                    sequences[2]             = FilterProteins.RemoveSequences(cancellationToken, sequences[2], pdbNonSymmetrical, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                    break;
                }
                }

                // Stop stopwatch immediately after operation.
                stopwatch.Stop();

                // Count sequences after operation.
                afterCount = sequences[0].Count / 2;

                if (!cancellationToken.IsCancellationRequested)
                {
                    for (int sequencesIndex = sequences.GetLowerBound(0); sequencesIndex <= sequences.GetUpperBound(0); sequencesIndex++)
                    {
                        if (sequences[sequencesIndex] != null)
                        {
                            // Find free filename to save the latest sequence results of operations.
                            string localSaveFilename = saveFilename;
                            localSaveFilename = localSaveFilename.Replace("%fasta_filename%", sequencesDescriptions[sequencesIndex]);


                            bool skipFile = false;

                            if (File.Exists(localSaveFilename))
                            {
                                if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                                {
                                    localSaveFilename = FileExistsHandler.FindNextFreeOutputFilename(localSaveFilename);
                                }
                                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                                {
                                }
                                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                                {
                                    skipFile = true;
                                }
                            }


                            if (!skipFile)
                            {
                                // Save the sequence results to previous set filename.
                                string savedFile /*s*/ = SequenceFileHandler.SaveSequencesAsFasta(sequences[sequencesIndex], localSaveFilename);

                                // Inform user that file has been saved.
                                //foreach (char savedFile in savedFiles)
                                //{
                                ProgressActionSet.Report("Saved file: " + savedFile, progressActionSet);
                                //}
                            }
                        }
                    }

                    // Update the user about the results.
                    ProgressActionSet.Report("Removed " + (beforeCount - afterCount) + " proteins. [" + afterCount + " proteins remaining]. Elapsed: " + stopwatch.Elapsed.ToString(@"dd\:hh\:mm\:ss"), progressActionSet);
                }
            }

            if (!cancellationToken.IsCancellationRequested)
            {
                ProgressActionSet.Report("Finished all selected filtering operations.", progressActionSet);
            }
            else
            {
                ProgressActionSet.Report("Cancelled.", progressActionSet);
                //UserProteinInterfaceOperations.ProgressBarReset(progressBar, 0, 100, 0);
                ////UserProteinInterfaceOperations.LabelEstimatedTimeRemainingUpdate(estimatedTimeRemaining, 0, 1, 1);

                ProgressActionSet.StartAction(100, progressActionSet);
                ProgressActionSet.ProgressAction(100, progressActionSet);
                ProgressActionSet.FinishAction(false, progressActionSet);
            }
        }
Ejemplo n.º 8
0
        public static void Upgma(List <List <decimal> > distanceMatrix, List <string> vectorNames, int minimumOutputTreeLeafs, out List <List <UpgmaNode> > nodeListList, out List <List <string> > treeListList, out List <string> finalTreeLeafOrderList, bool newickTreeEveryIteration = false, ProgressActionSet progressActionSet = null)
        {
            if (distanceMatrix == null || distanceMatrix.Count == 0 || distanceMatrix.Select(a => a.Count).Contains(0))
            {
                throw new ArgumentOutOfRangeException(nameof(distanceMatrix), "distance matrix is null or empty");
            }

            //var makeIterationTrees = false;

            //var distanceMatrixCache = new List<decimal[,]>();

            nodeListList = new List <List <UpgmaNode> >();
            var nodeList = new List <UpgmaNode>();

            treeListList = new List <List <string> >();

            finalTreeLeafOrderList = new List <string>();

            // store a list of every index merged to each index position in the distance matrix
            var distanceMatrixMap = new List <List <int> >();

            //var distanceMatrixMapCache = new List<List<List<int>>>();

            for (var matrixIndex = 0; matrixIndex < distanceMatrix.Count; matrixIndex++)
            {
                distanceMatrixMap.Add(new List <int>());
                distanceMatrixMap[matrixIndex].Add(matrixIndex);

                var node = new UpgmaNode();
                node.VectorIndexes.Add(matrixIndex);
                //node.IsLeafNode = true;
                node.CopyPropertiesToGenericNodeProperties();
                nodeList.Add(node);
            }


            int itemsCompleted = 0;
            int itemsTotal     = distanceMatrix.Count;

            var startTicks = DateTime.Now.Ticks;


            ProgressActionSet.StartAction(itemsTotal, progressActionSet);

            while (distanceMatrix.Count > 1 && distanceMatrix.Select(a => a.Count).Max() > 1)
            {
                // find which indexes to join
                var lowestIndexes = UpgmaLowestDistanceIndexes(distanceMatrix);
                var lowestValue   = distanceMatrix[lowestIndexes.X][lowestIndexes.Y];
                var lowerIndex    = lowestIndexes.X < lowestIndexes.Y ? lowestIndexes.X : lowestIndexes.Y;
                var higherIndex   = lowestIndexes.X > lowestIndexes.Y ? lowestIndexes.X : lowestIndexes.Y;
                var nodeDistance  = lowestValue / 2;

                if (lowerIndex == higherIndex)
                {
                    throw new Exception("lower index and higher index have the same value");
                }

                // Take parent node identities
                var parentNodeIdA = distanceMatrixMap[lowerIndex].OrderBy(o => o).ToList();
                var parentNodeIdB = distanceMatrixMap[higherIndex].OrderBy(o => o).ToList();

                var childNodeId = new List <int>();
                childNodeId.AddRange(parentNodeIdA);
                childNodeId.AddRange(parentNodeIdB);
                childNodeId = childNodeId.Distinct().ToList();

                // Find if parent nodes already exist
                var parentNodeCandidatesA = nodeList.Where(a => a.VectorIndexes.OrderBy(o => o).SequenceEqual(parentNodeIdA)).ToList();
                var parentNodeCandidatesB = nodeList.Where(a => a.VectorIndexes.OrderBy(o => o).SequenceEqual(parentNodeIdB)).ToList();

                UpgmaNode parentNodeA;
                parentNodeA = parentNodeCandidatesA[0];

                UpgmaNode parentNodeB;
                parentNodeB = parentNodeCandidatesB[0];


                var childNode = new UpgmaNode();
                nodeList.Add(childNode);

                parentNodeA.ChildNode = childNode;
                parentNodeB.ChildNode = childNode;
                childNode.ParentNodeA = parentNodeA;
                childNode.ParentNodeB = parentNodeB;

                var parentTotalDistanceA = DistanceNodeToFinalParent(parentNodeA);
                var parentTotalDistanceB = DistanceNodeToFinalParent(parentNodeB);

                parentNodeA.DistanceChildNode = nodeDistance - parentTotalDistanceA;
                parentNodeB.DistanceChildNode = nodeDistance - parentTotalDistanceB;
                childNode.DistanceParentNodeA = nodeDistance - parentTotalDistanceA;
                childNode.DistanceParentNodeB = nodeDistance - parentTotalDistanceB;

                childNode.DistanceMatrixIterationNumber = itemsCompleted;// distanceMatrixCache.Count - 1;
                childNode.VectorIndexes = childNodeId;

                parentNodeA.CopyPropertiesToGenericNodeProperties();
                parentNodeB.CopyPropertiesToGenericNodeProperties();
                childNode.CopyPropertiesToGenericNodeProperties();
                // rearrange the matrix map with the new indexes joined distanceMatrixMap[higherIndex]
                distanceMatrixMap[lowerIndex].AddRange(distanceMatrixMap[higherIndex]);
                distanceMatrixMap.RemoveAt(higherIndex);

                // recalculate distance matrix with indexes joined with mean average
                distanceMatrix = UpgmaDistanceMatrixNextIteration(distanceMatrix, lowestIndexes);

                if (newickTreeEveryIteration)
                {
                    List <string> treeLeafOrderList;
                    var           iterationTree = Newick.NewickTreeFormat(nodeList.ToList <GenericNode>(), vectorNames, out treeLeafOrderList, minimumOutputTreeLeafs);
                    treeListList.Add(iterationTree);
                    finalTreeLeafOrderList = treeLeafOrderList;
                    nodeListList.Add(UpgmaNode.CopyNetwork(nodeList));
                }

                itemsCompleted++;

                ProgressActionSet.ProgressAction(1, progressActionSet);
                ProgressActionSet.EstimatedTimeRemainingAction(startTicks, itemsCompleted, itemsTotal, progressActionSet);
            }

            if (!newickTreeEveryIteration)
            {
                List <string> treeLeafOrderList;
                var           iterationTree = Newick.NewickTreeFormat(nodeList.ToList <GenericNode>(), vectorNames, out treeLeafOrderList, minimumOutputTreeLeafs);
                treeListList.Add(iterationTree);
                finalTreeLeafOrderList = treeLeafOrderList;
                nodeListList.Add(UpgmaNode.CopyNetwork(nodeList));
            }

            ProgressActionSet.FinishAction(true, progressActionSet);
        }
        /*
         * public static void ClusterVectorDistanceMatrixUpgma(List<VectorProteinInterfaceWhole> vectorProteinInterfaceWholeList, decimal[,] vectorDistanceMatrix, int minimumOutputTreeLeafs, out List<string> vectorNames, out List<List<UpgmaNode>> nodeList, out List<List<string>> treeList, ProgressActionSet progressActionSet)
         * {
         *  if (vectorProteinInterfaceWholeList == null) throw new ArgumentNullException(nameof(vectorProteinInterfaceWholeList));
         *  if (vectorDistanceMatrix == null) throw new ArgumentNullException(nameof(vectorDistanceMatrix));
         *
         *  vectorNames = vectorProteinInterfaceWholeList.Select(VectorProteinInterfaceWholeTreeHeader).ToList();
         *
         *  List<string> finalTreeLeafOrderList;
         *  UpgmaClustering.Upgma(vectorDistanceMatrix, vectorNames, minimumOutputTreeLeafs, out nodeList, out treeList, out finalTreeLeafOrderList, false, progressActionSet);
         * }
         */

        public static void BestDistanceMatrixWithPartsAlignment(CancellationToken cancellationToken, List <VectorProteinInterfaceWhole> vectorProteinInterfaceWholeList, VectorDistanceMeasurementValues vectorDistanceMeasurementValues, out double[,] optimisticDistanceMatrix, /* out double[,] pessimisticDistanceMatrix,*/ ProgressActionSet progressActionSet)
        {
            if (vectorProteinInterfaceWholeList == null)
            {
                throw new ArgumentNullException(nameof(vectorProteinInterfaceWholeList));
            }
            if (vectorDistanceMeasurementValues == null)
            {
                throw new ArgumentNullException(nameof(vectorDistanceMeasurementValues));
            }

            var totalVectors = vectorProteinInterfaceWholeList.Count;

            var optimisticDistanceMatrix2 = new double[totalVectors, totalVectors];
            //var pessimisticDistanceMatrix2 = new double[totalVectors, totalVectors];

            var workDivision = new WorkDivision(vectorProteinInterfaceWholeList.Count, -1);

            ProgressActionSet.StartAction(vectorProteinInterfaceWholeList.Count, progressActionSet);

            for (int threadIndex = 0; threadIndex < workDivision.ThreadCount; threadIndex++)
            {
                int localThreadIndex = threadIndex;

                var task = Task.Run(() =>
                {
                    for (int indexX = workDivision.ThreadFirstIndex[localThreadIndex]; indexX <= workDivision.ThreadLastIndex[localThreadIndex]; indexX++)
                    {
                        if (cancellationToken.IsCancellationRequested)
                        {
                            break;
                        }
                        var vectorProteinInterfaceWholeX = vectorProteinInterfaceWholeList[indexX];

                        for (int indexY = 0; indexY < vectorProteinInterfaceWholeList.Count; indexY++)
                        {
                            if (indexX >= indexY)
                            {
                                continue;
                            }

                            var vectorProteinInterfaceWholeY = vectorProteinInterfaceWholeList[indexY];

                            if (vectorProteinInterfaceWholeX.FullProteinInterfaceId == vectorProteinInterfaceWholeY.FullProteinInterfaceId)
                            {
                                continue;
                            }

                            double optimisticDistance;
                            //double pessimisticDistance;
                            BestDistanceWithPartsAlignment(vectorProteinInterfaceWholeX, vectorProteinInterfaceWholeY, vectorDistanceMeasurementValues, out optimisticDistance /*, out pessimisticDistance*/);

                            var lengthDifference = Math.Abs(vectorProteinInterfaceWholeX.ProteinInterfaceLength - vectorProteinInterfaceWholeY.ProteinInterfaceLength);

                            var lengthDistance = lengthDifference * vectorDistanceMeasurementValues.DifferentLengthProteinInterface;

                            optimisticDistance += lengthDistance;
                            //pessimisticDistance += lengthDistance;

                            optimisticDistanceMatrix2[indexX, indexY] = optimisticDistance;
                            //pessimisticDistanceMatrix2[indexX, indexY] = pessimisticDistance;

                            optimisticDistanceMatrix2[indexY, indexX] = optimisticDistance;
                            //pessimisticDistanceMatrix2[indexY, indexX] = pessimisticDistance;
                        }

                        workDivision.IncrementItemsCompleted(1);
                        ProgressActionSet.ProgressAction(1, progressActionSet);
                        ProgressActionSet.EstimatedTimeRemainingAction(workDivision.StartTicks, workDivision.ItemsCompleted, workDivision.ItemsToProcess, progressActionSet);
                    }
                }, cancellationToken);

                workDivision.TaskList.Add(task);
            }

            workDivision.WaitAllTasks();

            ProgressActionSet.FinishAction(true, progressActionSet);

            optimisticDistanceMatrix = optimisticDistanceMatrix2;
            //pessimisticDistanceMatrix = pessimisticDistanceMatrix2;
        }
Ejemplo n.º 10
0
        /// <summary>
        ///     This method returns a dictionary entry for each protein id (pdb id), with a list of interaction vectors
        /// </summary>
        /// <returns></returns>
        public static List <VectorProteinInterfaceWhole> LoadProteinInterfaceVectorFromFiles(
            CancellationToken cancellationToken,
            decimal maxAtomInterationDistance,
            decimal minimumProteinInterfaceDensity,
            string[] sequenceListFileArray,
            string[] pdbFileDirectoryLocationArray,
            ProgressActionSet progressActionSet)
        {
            if (sequenceListFileArray == null)
            {
                throw new ArgumentNullException(nameof(sequenceListFileArray));
            }
            if (pdbFileDirectoryLocationArray == null)
            {
                throw new ArgumentNullException(nameof(pdbFileDirectoryLocationArray));
            }

            var vectorProteinInterfaceWholeList = new List <VectorProteinInterfaceWhole>();

            // 1: Open list of sequences already cleaned to have only symmetrical homodimers (fasta file only contains 100% symmetrical homodimers with all other junk removed - but could have any number of proteinInterfaces per chain)
            List <ISequence> sequenceList = SequenceFileHandler.LoadSequenceFileList(sequenceListFileArray, StaticValues.MolNameProteinAcceptedValues);

            var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequenceList);

            // 2: Get a list of the unique ids for the sequences
            List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequenceList);

            if (pdbIdList == null || pdbIdList.Count == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(sequenceListFileArray), "Error loading PDB ID list");
            }

            // 3: Get a list of PDB files found in user specified directory
            string[] pdbFilesArray = ProteinDataBankFileOperations.GetPdbFilesArray(pdbFileDirectoryLocationArray);



            ProgressActionSet.StartAction(pdbFilesArray.Length, progressActionSet);



            var startTicks = DateTime.Now.Ticks;

            // 4: Loop through each pdb file
            for (int pdbFileNumber = 0; pdbFileNumber < pdbFilesArray.Length; pdbFileNumber++) // +1 is for progress update
            {
                ProgressActionSet.ProgressAction(1, progressActionSet);

                ProgressActionSet.EstimatedTimeRemainingAction(startTicks, pdbFileNumber + 1, pdbFilesArray.Length, progressActionSet);

                // get unique id of pdb file
                string pdbFilename = pdbFilesArray[pdbFileNumber];
                string proteinId   = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);

                // check pdb unique id was in the loaded sequence list
                if (!pdbIdList.Contains(proteinId))
                {
                    continue;
                }

                ClusterProteinDataBankFileResult clusterPdbFileResult = Clustering.ClusterProteinDataBankFile(cancellationToken, maxAtomInterationDistance, minimumProteinInterfaceDensity, pdbFilename, pdbIdChainIdList, ClusteringMethodOptions.ClusterWithResidueSequenceIndex, -1, -1, progressActionSet);

                if (clusterPdbFileResult == null)
                {
                    continue;
                }

                List <ProteinInterfaceSequenceAndPositionData> proteinInterfaceSequenceAndPositionDataList = clusterPdbFileResult.ProteinInterfaceAnalysisResultData.ProteinInterfacesSequenceAndPositionDataList;
                proteinInterfaceSequenceAndPositionDataList = proteinInterfaceSequenceAndPositionDataList.OrderBy(a => a.FullProteinInterfaceId.ProteinId).ThenBy(a => a.FullProteinInterfaceId.ChainId).ThenBy(a => a.FullProteinInterfaceId.ProteinInterfaceId).ToList();

                for (int proteinInterfaceSequenceAndPositionDataListIndex = 0; proteinInterfaceSequenceAndPositionDataListIndex < proteinInterfaceSequenceAndPositionDataList.Count; proteinInterfaceSequenceAndPositionDataListIndex++)
                {
                    ProteinInterfaceSequenceAndPositionData proteinInterfaceSequenceAndPositionData = proteinInterfaceSequenceAndPositionDataList[proteinInterfaceSequenceAndPositionDataListIndex];

                    var seq = sequenceList.FirstOrDefault(a =>
                    {
                        var p = SequenceIdSplit.SequenceIdToPdbIdAndChainId(a.ID);
                        return(p.PdbId.ToUpperInvariant() == proteinInterfaceSequenceAndPositionData.FullProteinInterfaceId.ProteinId.ToUpperInvariant() && p.ChainId.ToUpperInvariant() == proteinInterfaceSequenceAndPositionData.ChainIdLetter.ToUpperInvariant());
                    });

                    var seqLen = seq != null ? seq.Count : -1;

                    var vectorProteinInterfaceWholeFwd = MakeVectorProteinInterfaceWhole(pdbFilename, proteinInterfaceSequenceAndPositionData, false, false);
                    vectorProteinInterfaceWholeFwd.FullSequenceLength = seqLen;

                    vectorProteinInterfaceWholeList.Add(vectorProteinInterfaceWholeFwd);

                    var vectorProteinInterfaceWholeRev = MakeVectorProteinInterfaceWhole(pdbFilename, proteinInterfaceSequenceAndPositionData, true, false);
                    vectorProteinInterfaceWholeRev.FullSequenceLength = seqLen;

                    vectorProteinInterfaceWholeList.Add(vectorProteinInterfaceWholeRev);
                }
            }

            ProgressActionSet.FinishAction(true, progressActionSet);

            vectorProteinInterfaceWholeList = vectorProteinInterfaceWholeList.OrderBy(a => a.FullProteinInterfaceId.ProteinId).ThenBy(a => a.FullProteinInterfaceId.ChainId).ThenBy(a => a.FullProteinInterfaceId.ProteinInterfaceId).ToList();

            return(vectorProteinInterfaceWholeList);
        }