Пример #1
0
        public static void ConfirmSequencesSaved(List <string> pdbIdList, List <string> saveFilenameList, ProgressActionSet progressActionSet)
        {
            if (pdbIdList == null)
            {
                throw new ArgumentNullException(nameof(pdbIdList));
            }
            // Confirm the total number of sequences saved is equal to original number loaded

            if (saveFilenameList == null || saveFilenameList.Count == 0)
            {
                ProgressActionSet.Report("Warning: no files were saved", progressActionSet);
                return;
            }

            List <ISequence> confirmSequencesList = SequenceFileHandler.LoadSequenceFileList(saveFilenameList, StaticValues.MolNameProteinAcceptedValues);

            List <string> confirmPdbIdList = FilterProteins.SequenceListToPdbIdList(confirmSequencesList);

            if (pdbIdList.Count == confirmPdbIdList.Count)
            {
                ProgressActionSet.Report("All sequences sorted into categories", progressActionSet);
            }
            else
            {
                var missingPdbIdList = new List <string>(pdbIdList);

                foreach (string pdbId in confirmPdbIdList)
                {
                    missingPdbIdList.Remove(pdbId);
                }

                ProgressActionSet.Report("Some sequences are unaccounted for: " + string.Join(", ", missingPdbIdList), progressActionSet);
            }
        }
        public static string AddSequenceAndProteinCountToFilename(List <ISequence> sequenceList, string saveFilename)
        {
            string path = Path.GetDirectoryName(saveFilename);

            if (!string.IsNullOrEmpty(path) && (path[path.Length - 1] != '\\' && path[path.Length - 1] != '/'))
            {
                path = path + "/";
            }

            string file    = Path.GetFileNameWithoutExtension(saveFilename);
            string fileExt = Path.GetExtension(saveFilename);

            if (!string.IsNullOrEmpty(fileExt) && fileExt[0] != '.')
            {
                fileExt = "." + fileExt;
            }


            var pdbIdList      = FilterProteins.SequenceListToPdbIdList(sequenceList);
            var totalPdbIds    = pdbIdList.Count;
            var totalSequences = sequenceList.Count;

            string sequenceProteinStr = " [" + totalPdbIds + " proteins - " + totalSequences + " sequences]";

            if (!saveFilename.Contains(sequenceProteinStr))
            {
                saveFilename = path + file + sequenceProteinStr + fileExt;
            }

            return(saveFilename);
        }
Пример #3
0
        /// <summary>
        ///     This method removes sequences not having the required number of chains.
        /// </summary>
        /// <returns></returns>
        public static List <ISequence> RemoveSequencesWithIncorrectNumberOfChains(CancellationToken cancellationToken, List <ISequence> sequenceList, int numberOfChainsRequired = 2, ProgressActionSet progressActionSet = null)
        {
            if (sequenceList == null || sequenceList.Count == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(sequenceList));
            }

            if (progressActionSet == null)
            {
                throw new ArgumentNullException(nameof(progressActionSet));
            }

            var pdbIdListNotDistinct = FilterProteins.SequenceListToPdbIdList(sequenceList, false);

            ProgressActionSet.StartAction(pdbIdListNotDistinct.Count, progressActionSet);

            var workDivision = new WorkDivision <List <string> >(pdbIdListNotDistinct.Count);

            for (var threadIndex = 0; threadIndex < workDivision.ThreadCount; threadIndex++)
            {
                var localThreadIndex       = threadIndex;
                Task <List <string> > task = Task.Run(() =>
                {
                    var taskResult = pdbIdListNotDistinct.Where((a, pdbIdIndex) =>
                    {
                        if (pdbIdIndex < workDivision.ThreadFirstIndex[localThreadIndex] || pdbIdIndex > workDivision.ThreadLastIndex[localThreadIndex])
                        {
                            return(false);
                        }

                        workDivision.IncrementItemsCompleted(1);
                        ProgressActionSet.ProgressAction(1, progressActionSet);
                        ProgressActionSet.EstimatedTimeRemainingAction(workDivision.StartTicks, workDivision.ItemsCompleted, workDivision.ItemsToProcess, progressActionSet);

                        return(pdbIdListNotDistinct.Count(b => a == b) != numberOfChainsRequired);
                    }).ToList();

                    return(taskResult);
                }, cancellationToken);

                workDivision.TaskList.Add(task);
            }

            workDivision.WaitAllTasks();

            var sequencesWithIncorrectNumberOfChains = new List <string>();

            foreach (var task in workDivision.TaskList.Where(t => t != null && t.Result != null && t.IsCompleted && !t.IsCanceled && !t.IsFaulted))
            {
                sequencesWithIncorrectNumberOfChains.AddRange(task.Result);
            }

            var result = RemoveSequences(cancellationToken, sequenceList, sequencesWithIncorrectNumberOfChains);

            ProgressActionSet.FinishAction(true, progressActionSet);

            return(result);
        }
Пример #4
0
        public static void FilterProteinInterfaceLengths(
            CancellationToken cancellationToken,
            decimal maxAtomInterationDistance,
            decimal minimumProteinInterfaceDensity,
            string[] sequenceListFileArray,
            string[] pdbFileDirectoryLocationArray,
            string filterProteinInterfacesLengthOutputFilename,
            bool filterProteinInterfaceCountsWithoutLengths,
            bool filterProteinInterfaceCountsWithLengths,
            FileExistsHandler.FileExistsOptions fileExistsOptions,
            ProgressActionSet progressActionSet)
        {
            if (sequenceListFileArray == null)
            {
                throw new ArgumentNullException(nameof(sequenceListFileArray));
            }
            if (pdbFileDirectoryLocationArray == null)
            {
                throw new ArgumentNullException(nameof(pdbFileDirectoryLocationArray));
            }
            if (filterProteinInterfacesLengthOutputFilename == null)
            {
                throw new ArgumentNullException(nameof(filterProteinInterfacesLengthOutputFilename));
            }
            if (!filterProteinInterfaceCountsWithoutLengths && !filterProteinInterfaceCountsWithLengths)
            {
                ProgressActionSet.Report("Cancelled: No filter options selected.", progressActionSet);
                return;
            }

            // Check all sequence files are found
            var missingSequenceFiles = sequenceListFileArray.Where(sequenceFile => !string.IsNullOrWhiteSpace(sequenceFile) && !File.Exists(sequenceFile)).ToList();

            if (missingSequenceFiles.Count > 0)
            {
                foreach (string missingSequenceFile in missingSequenceFiles)
                {
                    //throw new FileNotFoundException(sequenceFile);

                    ProgressActionSet.Report("Warning: Sequence file missing: " + missingSequenceFile, progressActionSet);
                }

                ProgressActionSet.Report("Cancelled: missing sequence files.", progressActionSet);
                return;
            }

            // Check all pdb folders are found
            var missingDirectoryList = pdbFileDirectoryLocationArray.Where(pdbDirectory => !string.IsNullOrWhiteSpace(pdbDirectory) && !Directory.Exists(pdbDirectory)).ToList();

            if (missingDirectoryList.Count > 0)
            {
                foreach (string pdbDirectory in missingDirectoryList)
                {
                    //throw new DirectoryNotFoundException(pdbDirectory);
                    ProgressActionSet.Report("Warning: Structure file directory missing: " + pdbDirectory, progressActionSet);
                }

                ProgressActionSet.Report("Cancelled: missing structure file directory.", progressActionSet);
                return;
            }

            const string proteinInterfacesTemplateText = "%proteinInterfaces%";

            if (string.IsNullOrWhiteSpace(filterProteinInterfacesLengthOutputFilename) || !filterProteinInterfacesLengthOutputFilename.Contains(proteinInterfacesTemplateText))
            {
                throw new ArgumentOutOfRangeException(nameof(filterProteinInterfacesLengthOutputFilename));
            }

            // Load fasta sequence files
            List <ISequence> sequenceList = SequenceFileHandler.LoadSequenceFileList(sequenceListFileArray, StaticValues.MolNameProteinAcceptedValues);

            // Get a list of the PDB Unique IDs with unique chain IDs which are wanted, ignoring others which may be present e.g. dna
            var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequenceList);

            // Get list of PDB Unique IDs
            List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequenceList);

            // Check PDB Unique IDs were successfully loaded
            if (pdbIdList == null || pdbIdList.Count == 0)
            {
                //throw new Exception("PDB ID List is empty or could not be loaded.");

                ProgressActionSet.Report("Error: Sequence list could not be loaded", progressActionSet);
                return;
            }

            // 3: Get a list of PDB files found in user specified directory

            string[] pdbFilesArray = ProteinDataBankFileOperations.RemoveNonWhiteListedPdbIdFromPdbFilesArray(pdbIdList, ProteinDataBankFileOperations.GetPdbFilesArray(pdbFileDirectoryLocationArray));

            // Check all PDB files are found
            List <string> missingPdbFilesList = ProteinDataBankFileOperations.CheckForMissingPdbFiles(pdbFilesArray, pdbIdList);

            if (missingPdbFilesList != null && missingPdbFilesList.Count > 0)
            {
                ProgressActionSet.Report("Missing PDB Files: " + string.Join(", ", missingPdbFilesList), progressActionSet);
            }



            ProgressActionSet.StartAction(pdbFilesArray.Length, progressActionSet);


            int progressIncrement = 0;

            var proteinInterfacesCountResultWithLengths = new Dictionary <string, List <string> >();

            var startTicks = DateTime.Now.Ticks;

            // 4: Loop through each pdb file
            for (int pdbFileNumber = 0; pdbFileNumber < pdbFilesArray.Length + 1; pdbFileNumber++) // +1 is for progress update
            {
                if (progressIncrement > 0)
                {
                    ProgressActionSet.ProgressAction(progressIncrement, progressActionSet);
                    progressIncrement = 0;
                    if (pdbFileNumber >= pdbFilesArray.Length)
                    {
                        break;
                    }
                }
                ProgressActionSet.EstimatedTimeRemainingAction(startTicks, pdbFileNumber, pdbFilesArray.Length, progressActionSet);

                progressIncrement++;

                // get unique id of pdb file
                string pdbFilename = pdbFilesArray[pdbFileNumber];
                string proteinId   = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);

                // check pdb unique id was in the loaded sequence list
                if (!pdbIdList.Contains(proteinId))
                {
                    continue;
                }

                // perform clustering to detect interaction proteinInterfaces
                ClusterProteinDataBankFileResult clusterPdbFileResult = Clustering.ClusterProteinDataBankFile(cancellationToken, maxAtomInterationDistance, minimumProteinInterfaceDensity, pdbFilename, pdbIdChainIdList, ClusteringMethodOptions.ClusterWithResidueSequenceIndex, -1, -1, null);

                if (clusterPdbFileResult == null)
                {
                    continue;
                }

                int[] proteinInterfacesCount = new int[clusterPdbFileResult.ClusteringFullResultListContainer.ChainList.Count];

                for (int chainIndex = 0; chainIndex < clusterPdbFileResult.ClusteringFullResultListContainer.ChainList.Count; chainIndex++)
                {
                    int totalProteinInterfaces = clusterPdbFileResult.ProteinInterfaceAnalysisResultData.ProteinInterfacesSequenceAndPositionDataList.Count(proteinInterface => proteinInterface.FullProteinInterfaceId.ChainId == chainIndex);

                    proteinInterfacesCount[chainIndex] = totalProteinInterfaces;
                }

                var proteinInterfacesCountStr = string.Join(" ", proteinInterfacesCount.OrderBy(x => x));

                List <ProteinInterfaceSequenceAndPositionData> proteinInterfaces = clusterPdbFileResult.ProteinInterfaceAnalysisResultData.ProteinInterfacesSequenceAndPositionDataList;
                int[] proteinInterfacesLength = new int[proteinInterfaces.Count];

                for (int index = 0; index < proteinInterfaces.Count; index++)
                {
                    ProteinInterfaceSequenceAndPositionData proteinInterface = proteinInterfaces[index];

                    proteinInterfacesLength[index] = proteinInterface.ProteinInterfaceLength;
                }

                var proteinInterfacesLengthStr = string.Join(" ", proteinInterfacesLength.Distinct().OrderBy(x => x));

                if (proteinInterfacesLength.Length == 0)
                {
                    proteinInterfacesLengthStr = 0.ToString();
                }

                var chainsCountStr = clusterPdbFileResult.ClusteringFullResultListContainer.ChainList.Count;

                if (filterProteinInterfaceCountsWithoutLengths)
                {
                    var combinedKeyAll = "chains [" + chainsCountStr + "] proteinInterfaces [" + proteinInterfacesCountStr + "]";

                    if (!proteinInterfacesCountResultWithLengths.ContainsKey(combinedKeyAll))
                    {
                        proteinInterfacesCountResultWithLengths.Add(combinedKeyAll, new List <string>());
                    }

                    proteinInterfacesCountResultWithLengths[combinedKeyAll].Add(proteinId);
                }

                if (filterProteinInterfaceCountsWithLengths)
                {
                    var combinedKeyWithLengths = "chains [" + chainsCountStr + "] proteinInterfaces [" + proteinInterfacesCountStr + "] lengths [" + proteinInterfacesLengthStr + "]";

                    if (!proteinInterfacesCountResultWithLengths.ContainsKey(combinedKeyWithLengths))
                    {
                        proteinInterfacesCountResultWithLengths.Add(combinedKeyWithLengths, new List <string>());
                    }

                    proteinInterfacesCountResultWithLengths[combinedKeyWithLengths].Add(proteinId);
                }
            }

            var confirmSaveList = new List <string>();

            foreach (var kvp in proteinInterfacesCountResultWithLengths)
            {
                var seq2 = new List <ISequence>(sequenceList);
                seq2 = FilterProteins.RemoveSequences(cancellationToken, seq2, kvp.Value, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                var saveFilename = filterProteinInterfacesLengthOutputFilename;
                saveFilename = saveFilename.Replace(proteinInterfacesTemplateText, kvp.Key);

                var actualSavedFilename = SequenceFileHandler.SaveSequencesAsFasta(seq2, saveFilename, true, fileExistsOptions, progressActionSet);

                if (!string.IsNullOrWhiteSpace(actualSavedFilename))
                {
                    confirmSaveList.Add(actualSavedFilename);
                }
            }

            // Confirm the total number of sequences saved is equal to original number loaded
            ConfirmSequencesSaved(pdbIdList, confirmSaveList, progressActionSet);

            ProgressActionSet.FinishAction(true, progressActionSet);
        }
        /// <summary>
        ///     This method iterates through the provided FASTA files creating separate calculated outputs for each of them.
        /// </summary>
        /// <param name="fastaFiles">The FASTA files to process.</param>
        /// <param name="pdbFilesFolders">The locations where PDB files may be found.</param>
        /// <param name="spreadsheetSaveFilenameTemplate">A template filename to save the outputs.</param>
        /// <param name="saveTsv"></param>
        /// <param name="saveXl"></param>
        /// <param name="cancellationToken"></param>
        /// <param name="progressActionSet"></param>
        /// <param name="fileExistsOptions"></param>
        public static void MakeHomodimerStatisticsSpreadsheetsAndOutputFiles(decimal maxAtomInterationDistance, string[] fastaFiles, string[] pdbFilesFolders, string spreadsheetSaveFilenameTemplate, bool saveTsv, bool saveXl, CancellationToken cancellationToken, ProgressActionSet progressActionSet = null, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            if (fastaFiles == null || fastaFiles.Length == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(fastaFiles));
            }

            if (pdbFilesFolders == null || pdbFilesFolders.Length == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(pdbFilesFolders));
            }

            if (!saveTsv && !saveXl)
            {
                throw new ArgumentOutOfRangeException(nameof(saveTsv));
            }

            for (int fastaFileNumber = 0; fastaFileNumber < fastaFiles.Length; fastaFileNumber++)
            {
                string fastaFilename = fastaFiles[fastaFileNumber];

                if (string.IsNullOrWhiteSpace(fastaFilename))
                {
                    continue;
                }

                ProgressActionSet.Report("Attempting to open file: " + fastaFilename, progressActionSet);

                List <ISequence> sequences = SequenceFileHandler.LoadSequenceFile(fastaFilename, StaticValues.MolNameProteinAcceptedValues);

                var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequences);

                if ((sequences == null) || (sequences.Count == 0))
                {
                    ProgressActionSet.Report("Error could not load file: " + fastaFilename, progressActionSet);
                    continue;
                }
                ProgressActionSet.Report("Loaded " + sequences.Count + " sequences from file: " + fastaFilename, progressActionSet);

                List <string> pdbIdList      = FilterProteins.SequenceListToPdbIdList(sequences);
                string        appendFilename = FileAndPathMethods.FullPathToFilename(fastaFilename);

                ProgressActionSet.Report("Creating spreadsheets...", progressActionSet);
                Stopwatch stopwatch       = Stopwatch.StartNew();
                var       spreadsheetList = MakeHomodimerStatisticsSpreadsheetsList(cancellationToken, maxAtomInterationDistance, pdbFilesFolders, pdbIdList, pdbIdChainIdList, progressActionSet);
                stopwatch.Stop();
                ProgressActionSet.Report("Finished calculating spreadsheet data [Elapsed: " + stopwatch.Elapsed.ToString(@"dd\:hh\:mm\:ss") + "]", progressActionSet);

                if (cancellationToken.IsCancellationRequested)
                {
                    //UserProteinInterfaceOperations.ProgressBarReset(progressBar, 0, 100, 0);
                    ////UserProteinInterfaceOperations.LabelEstimatedTimeRemainingUpdate(estimatedTimeRemaining, 0, 1, 1);

                    ProgressActionSet.StartAction(100, progressActionSet);
                    ProgressActionSet.ProgressAction(100, progressActionSet);
                    ProgressActionSet.FinishAction(false, progressActionSet);
                    ProgressActionSet.Report("Cancelled.", progressActionSet);
                    break;
                }


                for (int spreadsheetIndex = 0; spreadsheetIndex < spreadsheetList.Count; spreadsheetIndex++)
                {
                    var spreadsheet = spreadsheetList[spreadsheetIndex];

                    if (cancellationToken.IsCancellationRequested)
                    {
                        break;
                    }



                    // Remove the first row (which has the name for use in a worksheet title, not currently used)
                    var sheetName       = spreadsheet[0][0].CellData;
                    var spreadsheetName = spreadsheet[1][0].CellData;
                    spreadsheet.RemoveAt(0);

                    // "c:/dResults/Results - %date% %time% - %fasta_filename% - %spreadsheet_name%.tsv"
                    string saveFilename = spreadsheetSaveFilenameTemplate;

                    saveFilename = saveFilename.Replace("%spreadsheet_name%", spreadsheetName);
                    saveFilename = saveFilename.Replace("%fasta_filename%", appendFilename);
                    saveFilename = saveFilename.Replace("%date%", DateTime.Now.ToString("yyyy-MM-dd"));
                    saveFilename = saveFilename.Replace("%time%", DateTime.Now.ToString("HH.mm.ss"));
                    saveFilename = saveFilename.Replace("%batch_number%", ""); //string.Empty + (fastaFileNumber + 1));
                    saveFilename = saveFilename.Replace("%batch_letter%", ""); //SpreadsheetFileHandler.AlphabetLetterRollOver(spreadsheetIndex));

                    sheetName = sheetName.Replace("%spreadsheet_name%", spreadsheetName);
                    sheetName = sheetName.Replace("%fasta_filename%", appendFilename);
                    sheetName = sheetName.Replace("%date%", DateTime.Now.ToString("yyyy-MM-dd"));
                    sheetName = sheetName.Replace("%time%", DateTime.Now.ToString("HH.mm.ss"));
                    sheetName = sheetName.Replace("%batch_number%", ""); //string.Empty + (fastaFileNumber + 1));
                    sheetName = sheetName.Replace("%batch_letter%", ""); //SpreadsheetFileHandler.AlphabetLetterRollOver(spreadsheetIndex));


                    //var tsvFilename = new FileInfo(FileAndPathMethods.RemoveFileExtension(saveFilename) + ".tsv");

                    var xlFilename = new FileInfo(FileAndPathMethods.RemoveFileExtension(saveFilename) + ".xlsx");

                    var savedFiles = SpreadsheetFileHandler.SaveSpreadsheet(xlFilename.FullName, new[] { sheetName }, spreadsheet, null, saveTsv, saveXl, fileExistsOptions);

                    ProgressActionSet.ReportFilesSaved(savedFiles, progressActionSet);
                }
            }

            ProgressActionSet.Report("Finished processing files.", progressActionSet);
        }
Пример #6
0
        /// <summary>
        ///     Filters the given FASTA files and PDB files with the given options and saves the results to disk.  Data needs to be
        ///     cleaned for two reasons, firstly to not pollute or distort the results, and secondly to save unnecessary processing
        ///     operations.
        /// </summary>
        /// <param name="cancellationToken"></param>
        /// <param name="pdbFilesFolders"></param>
        /// <param name="fastaFiles"></param>
        /// <param name="proteinOperationOptionFlags"></param>
        /// <param name="saveFastaFilenameTemplate"></param>
        /// <param name="consoleTextBox"></param>
        /// <param name="progressBar"></param>
        /// <param name="estimatedTimeRemaining"></param>
        public static void CleanProteins(CancellationToken cancellationToken, decimal maxAtomInterationDistance, string[] pdbFilesFolders, string[] fastaFiles, ProteinOperation proteinOperationOptionFlags, string saveFastaFilenameTemplate, ProgressActionSet progressActionSet, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            if (pdbFilesFolders == null || pdbFilesFolders.Length == 0)
            {
                if (proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveMultipleModelsInStructure) || proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonInteractingProteinsInStructure) || proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonSymmetricalInStructure) || proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveWrongNumberOfChainsInStructure))
                {
                    throw new ArgumentOutOfRangeException(nameof(pdbFilesFolders));
                }
            }

            if (fastaFiles == null || fastaFiles.Length == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(fastaFiles));
            }

            if (string.IsNullOrWhiteSpace(saveFastaFilenameTemplate))
            {
                throw new ArgumentOutOfRangeException(nameof(saveFastaFilenameTemplate));
            }

            string[] pdbFilesArray = ProteinDataBankFileOperations.GetPdbFilesArray(pdbFilesFolders);
            //List<string> pdbIdList = null;
            int    beforeCount             = 0;
            int    afterCount              = 0;
            string saveFilename            = saveFastaFilenameTemplate;
            var    currentProteinOperation = ProteinOperation.LoadFile;

            int[] numberSequencesLoaded;
            var   sequences = new List <ISequence> [3];

            //UserProteinInterfaceOperations.TextBoxClear(consoleTextBox);
            ProgressActionSet.Report("Filtering proteins.", progressActionSet);

            // Load fasta/sequence files.
            sequences[0] = SequenceFileHandler.LoadSequenceFileList(fastaFiles, StaticValues.MolNameProteinAcceptedValues, out numberSequencesLoaded, true);
            var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequences[0]);

            for (int numberSequencesLoadedIndex = 0; numberSequencesLoadedIndex < numberSequencesLoaded.Length; numberSequencesLoadedIndex++)
            {
                if (numberSequencesLoaded[numberSequencesLoadedIndex] > 0)
                {
                    ProgressActionSet.Report("Loaded " + numberSequencesLoaded[numberSequencesLoadedIndex] / 2 + " proteins from file: " + fastaFiles[numberSequencesLoadedIndex], progressActionSet);
                }
                else
                {
                    ProgressActionSet.Report("Error could not load file: " + fastaFiles[numberSequencesLoadedIndex], progressActionSet);
                }
            }

            if (numberSequencesLoaded.Count(a => a > 0) == 0)
            {
                return;
            }

            // Replace placeholder variable names.
            saveFilename = saveFilename.Replace("%date%", DateTime.Now.ToString("yyyy-MM-dd"));
            saveFilename = saveFilename.Replace("%time%", DateTime.Now.ToString("HH.mm.ss"));

            // Save initial loaded sequences.

            if (File.Exists(saveFilename))
            {
                if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                {
                    saveFilename = FileExistsHandler.FindNextFreeOutputFilename(saveFilename);
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                {
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                {
                    return;
                }
            }

            // Removes any entries not having a protein alphabet.
            while (currentProteinOperation != ProteinOperation.Finished)
            {
                if (cancellationToken.IsCancellationRequested)
                {
                    break;
                }

                currentProteinOperation = (ProteinOperation)((int)currentProteinOperation * 2);
                sequences[1]            = null;
                sequences[2]            = null;
                var sequencesDescriptions = new string[3];

                if (currentProteinOperation == ProteinOperation.Finished)
                {
                    break;
                }
                if (currentProteinOperation == ProteinOperation.RemoveNonProteinAlphabetInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonProteinAlphabetInSequence))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveWrongNumberOfChainsInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveWrongNumberOfChainsInSequence))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveExactDuplicatesInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveExactDuplicatesInSequence))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveNonHomodimersInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonHomodimersInSequence))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveWrongNumberOfChainsInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveWrongNumberOfChainsInStructure))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveMultipleModelsInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveMultipleModelsInStructure))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveNonInteractingProteinsInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonInteractingProteinsInStructure))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveNonSymmetricalInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonSymmetricalInStructure))
                {
                    continue;
                }

                // Count sequences before operation.
                beforeCount = sequences[0].Count / 2;

                // Update user about what is happening.
                ProgressActionSet.Report("", progressActionSet);
                ProgressActionSet.Report("Removing " + ProteinOperationString(currentProteinOperation) + " entries [from " + beforeCount + " proteins]", progressActionSet);

                // Start stopwatch to count duration of operation.
                Stopwatch stopwatch = Stopwatch.StartNew();

                // Perform specified operation.
                switch (currentProteinOperation)
                {
                case ProteinOperation.RemoveNonProteinAlphabetInSequence:
                {
                    sequencesDescriptions[0] = "01 - Removed non-protein sequences (sequence filter)";
                    sequences[0]             = FilterProteins.RemoveNonProteinAlphabetSequences(cancellationToken, sequences[0], progressActionSet);
                    break;
                }

                case ProteinOperation.RemoveWrongNumberOfChainsInSequence:
                {
                    sequencesDescriptions[0] = "02 - Removed non-dimers (sequence filter)";
                    sequences[0]             = FilterProteins.RemoveSequencesWithIncorrectNumberOfChains(cancellationToken, sequences[0], 2, progressActionSet);
                    break;
                }

                case ProteinOperation.RemoveExactDuplicatesInSequence:
                {
                    sequencesDescriptions[0] = "03 - Removed exact duplicates (sequence filter)";
                    sequences[0]             = FilterProteins.RemoveDuplicates(cancellationToken, sequences[0], progressActionSet);
                    break;
                }

                case ProteinOperation.RemoveNonHomodimersInSequence:
                {
                    // homodimers - all types - unfiltered for interactions or symmetry

                    var result = FilterProteins.SplitDimerTypes(cancellationToken, sequences[0], 30, 90, progressActionSet);

                    sequencesDescriptions[0] = "04 - Homodimers only (sequence filter)";
                    sequences[0]             = result.HomoDimerPdbIdList;

                    sequencesDescriptions[1] = "04 - Heterodimers only (sequence filter)";
                    sequences[1]             = result.HeteroDimerPdbIdList;

                    sequencesDescriptions[2] = "04 - Homology dimers only (sequence filter)";
                    sequences[2]             = result.HomologyDimerPdbIdList;
                    break;
                }

                case ProteinOperation.RemoveMultipleModelsInStructure:
                {
                    sequencesDescriptions[0] = "05 - Removed multiple models (structure filter)";
                    List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]);
                    pdbIdList    = FilterProteins.RemoveMultipleStructureModels(cancellationToken, pdbFilesFolders, pdbIdList, progressActionSet);
                    sequences[0] = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesInList);
                    break;
                }

                case ProteinOperation.RemoveWrongNumberOfChainsInStructure:
                {
                    sequencesDescriptions[0] = "06 - Removed non-dimers (structure filter)";
                    List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]);

                    //var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequences[0]);

                    pdbIdList    = FilterProteins.RemoveStructuresWithIncorrectNumberOfChains(cancellationToken, pdbFilesFolders, pdbIdList, pdbIdChainIdList, 2, progressActionSet);
                    sequences[0] = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesInList);
                    break;
                }

                case ProteinOperation.RemoveNonInteractingProteinsInStructure:
                {
                    // Make copy of sequences as we will split the list into two parts - with and without interactions.
                    sequences[1] = new List <ISequence>(sequences[0]);

                    // Get pdb id list from sequences, to check for pdb file, load, perform processing.
                    List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]);

                    // Makes a list of sequences with interactions.
                    pdbIdList = FilterProteins.RemoveSequencesWithoutInteractions(cancellationToken, maxAtomInterationDistance, pdbFilesFolders, pdbIdList, pdbIdChainIdList, progressActionSet);

                    // Remove any protein not in the list, keep the ones in the list.
                    sequencesDescriptions[0] = "08 - dimers - with interactions - unfiltered for symmetry";
                    sequences[0]             = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                    sequencesDescriptions[1] = "07 - dimers - no observed interactions";
                    sequences[1]             = FilterProteins.RemoveSequences(cancellationToken, sequences[1], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesInList);
                    break;
                }

                case ProteinOperation.RemoveNonSymmetricalInStructure:
                {
                    // Make copy of sequences as we will split the list into two parts - with and without symmetry.
                    List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]);
                    sequences[1] = new List <ISequence>(sequences[0]);
                    sequences[2] = new List <ISequence>(sequences[0]);
                    Dictionary <string, decimal> symmetryPercentage = FilterProteins.CalculateStructureSymmetry(cancellationToken, maxAtomInterationDistance, pdbFilesFolders, pdbIdList, pdbIdChainIdList, progressActionSet);

                    var pdbSymmetrical     = new List <string>();
                    var pdbPartSymmetrical = new List <string>();
                    var pdbNonSymmetrical  = new List <string>();

                    foreach (var symmetryPercentageKeyValuePair in symmetryPercentage)
                    {
                        if (symmetryPercentageKeyValuePair.Value == 0.0m)
                        {
                            pdbNonSymmetrical.Add(symmetryPercentageKeyValuePair.Key);
                        }
                        else if (symmetryPercentageKeyValuePair.Value == 100.0m)
                        {
                            pdbSymmetrical.Add(symmetryPercentageKeyValuePair.Key);
                        }
                        else if (symmetryPercentageKeyValuePair.Value > 0.0m && symmetryPercentageKeyValuePair.Value < 100.0m)
                        {
                            pdbPartSymmetrical.Add(symmetryPercentageKeyValuePair.Key);
                        }
                        else
                        {
                            ProgressActionSet.Report("Error: Out of bounds symmetry value of " + symmetryPercentageKeyValuePair.Value + " was found in " + symmetryPercentageKeyValuePair.Key + ".", progressActionSet);
                        }
                    }

                    sequencesDescriptions[0] = "11 - dimers - with interactions - 100% symmetrical";
                    sequences[0]             = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbSymmetrical, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                    sequencesDescriptions[1] = "10 - dimers - with interactions - 1% to 99% symmetrical";
                    sequences[1]             = FilterProteins.RemoveSequences(cancellationToken, sequences[1], pdbPartSymmetrical, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                    sequencesDescriptions[2] = "09 - dimers - with interactions - 0% symmetrical";
                    sequences[2]             = FilterProteins.RemoveSequences(cancellationToken, sequences[2], pdbNonSymmetrical, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                    break;
                }
                }

                // Stop stopwatch immediately after operation.
                stopwatch.Stop();

                // Count sequences after operation.
                afterCount = sequences[0].Count / 2;

                if (!cancellationToken.IsCancellationRequested)
                {
                    for (int sequencesIndex = sequences.GetLowerBound(0); sequencesIndex <= sequences.GetUpperBound(0); sequencesIndex++)
                    {
                        if (sequences[sequencesIndex] != null)
                        {
                            // Find free filename to save the latest sequence results of operations.
                            string localSaveFilename = saveFilename;
                            localSaveFilename = localSaveFilename.Replace("%fasta_filename%", sequencesDescriptions[sequencesIndex]);


                            bool skipFile = false;

                            if (File.Exists(localSaveFilename))
                            {
                                if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                                {
                                    localSaveFilename = FileExistsHandler.FindNextFreeOutputFilename(localSaveFilename);
                                }
                                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                                {
                                }
                                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                                {
                                    skipFile = true;
                                }
                            }


                            if (!skipFile)
                            {
                                // Save the sequence results to previous set filename.
                                string savedFile /*s*/ = SequenceFileHandler.SaveSequencesAsFasta(sequences[sequencesIndex], localSaveFilename);

                                // Inform user that file has been saved.
                                //foreach (char savedFile in savedFiles)
                                //{
                                ProgressActionSet.Report("Saved file: " + savedFile, progressActionSet);
                                //}
                            }
                        }
                    }

                    // Update the user about the results.
                    ProgressActionSet.Report("Removed " + (beforeCount - afterCount) + " proteins. [" + afterCount + " proteins remaining]. Elapsed: " + stopwatch.Elapsed.ToString(@"dd\:hh\:mm\:ss"), progressActionSet);
                }
            }

            if (!cancellationToken.IsCancellationRequested)
            {
                ProgressActionSet.Report("Finished all selected filtering operations.", progressActionSet);
            }
            else
            {
                ProgressActionSet.Report("Cancelled.", progressActionSet);
                //UserProteinInterfaceOperations.ProgressBarReset(progressBar, 0, 100, 0);
                ////UserProteinInterfaceOperations.LabelEstimatedTimeRemainingUpdate(estimatedTimeRemaining, 0, 1, 1);

                ProgressActionSet.StartAction(100, progressActionSet);
                ProgressActionSet.ProgressAction(100, progressActionSet);
                ProgressActionSet.FinishAction(false, progressActionSet);
            }
        }