Ejemplo n.º 1
0
        public static string SaveInteractionsOutput(string saveFilename, string[] interactionOutputStrings, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            var fileInfo = new FileInfo(saveFilename);

            if (fileInfo.Directory != null)
            {
                fileInfo.Directory.Create();
            }

            if (File.Exists(fileInfo.FullName))
            {
                if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                {
                    saveFilename = FileExistsHandler.FindNextFreeOutputFilename(fileInfo.FullName);
                    fileInfo     = new FileInfo(saveFilename);
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                {
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                {
                    return(null);
                }
            }

            File.WriteAllLines(fileInfo.FullName, interactionOutputStrings);

            return(fileInfo.FullName);
        }
Ejemplo n.º 2
0
        public static void FilterProteinInterfaceLengths(
            CancellationToken cancellationToken,
            decimal maxAtomInterationDistance,
            decimal minimumProteinInterfaceDensity,
            string[] sequenceListFileArray,
            string[] pdbFileDirectoryLocationArray,
            string filterProteinInterfacesLengthOutputFilename,
            bool filterProteinInterfaceCountsWithoutLengths,
            bool filterProteinInterfaceCountsWithLengths,
            FileExistsHandler.FileExistsOptions fileExistsOptions,
            ProgressActionSet progressActionSet)
        {
            if (sequenceListFileArray == null)
            {
                throw new ArgumentNullException(nameof(sequenceListFileArray));
            }
            if (pdbFileDirectoryLocationArray == null)
            {
                throw new ArgumentNullException(nameof(pdbFileDirectoryLocationArray));
            }
            if (filterProteinInterfacesLengthOutputFilename == null)
            {
                throw new ArgumentNullException(nameof(filterProteinInterfacesLengthOutputFilename));
            }
            if (!filterProteinInterfaceCountsWithoutLengths && !filterProteinInterfaceCountsWithLengths)
            {
                ProgressActionSet.Report("Cancelled: No filter options selected.", progressActionSet);
                return;
            }

            // Check all sequence files are found
            var missingSequenceFiles = sequenceListFileArray.Where(sequenceFile => !string.IsNullOrWhiteSpace(sequenceFile) && !File.Exists(sequenceFile)).ToList();

            if (missingSequenceFiles.Count > 0)
            {
                foreach (string missingSequenceFile in missingSequenceFiles)
                {
                    //throw new FileNotFoundException(sequenceFile);

                    ProgressActionSet.Report("Warning: Sequence file missing: " + missingSequenceFile, progressActionSet);
                }

                ProgressActionSet.Report("Cancelled: missing sequence files.", progressActionSet);
                return;
            }

            // Check all pdb folders are found
            var missingDirectoryList = pdbFileDirectoryLocationArray.Where(pdbDirectory => !string.IsNullOrWhiteSpace(pdbDirectory) && !Directory.Exists(pdbDirectory)).ToList();

            if (missingDirectoryList.Count > 0)
            {
                foreach (string pdbDirectory in missingDirectoryList)
                {
                    //throw new DirectoryNotFoundException(pdbDirectory);
                    ProgressActionSet.Report("Warning: Structure file directory missing: " + pdbDirectory, progressActionSet);
                }

                ProgressActionSet.Report("Cancelled: missing structure file directory.", progressActionSet);
                return;
            }

            const string proteinInterfacesTemplateText = "%proteinInterfaces%";

            if (string.IsNullOrWhiteSpace(filterProteinInterfacesLengthOutputFilename) || !filterProteinInterfacesLengthOutputFilename.Contains(proteinInterfacesTemplateText))
            {
                throw new ArgumentOutOfRangeException(nameof(filterProteinInterfacesLengthOutputFilename));
            }

            // Load fasta sequence files
            List <ISequence> sequenceList = SequenceFileHandler.LoadSequenceFileList(sequenceListFileArray, StaticValues.MolNameProteinAcceptedValues);

            // Get a list of the PDB Unique IDs with unique chain IDs which are wanted, ignoring others which may be present e.g. dna
            var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequenceList);

            // Get list of PDB Unique IDs
            List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequenceList);

            // Check PDB Unique IDs were successfully loaded
            if (pdbIdList == null || pdbIdList.Count == 0)
            {
                //throw new Exception("PDB ID List is empty or could not be loaded.");

                ProgressActionSet.Report("Error: Sequence list could not be loaded", progressActionSet);
                return;
            }

            // 3: Get a list of PDB files found in user specified directory

            string[] pdbFilesArray = ProteinDataBankFileOperations.RemoveNonWhiteListedPdbIdFromPdbFilesArray(pdbIdList, ProteinDataBankFileOperations.GetPdbFilesArray(pdbFileDirectoryLocationArray));

            // Check all PDB files are found
            List <string> missingPdbFilesList = ProteinDataBankFileOperations.CheckForMissingPdbFiles(pdbFilesArray, pdbIdList);

            if (missingPdbFilesList != null && missingPdbFilesList.Count > 0)
            {
                ProgressActionSet.Report("Missing PDB Files: " + string.Join(", ", missingPdbFilesList), progressActionSet);
            }



            ProgressActionSet.StartAction(pdbFilesArray.Length, progressActionSet);


            int progressIncrement = 0;

            var proteinInterfacesCountResultWithLengths = new Dictionary <string, List <string> >();

            var startTicks = DateTime.Now.Ticks;

            // 4: Loop through each pdb file
            for (int pdbFileNumber = 0; pdbFileNumber < pdbFilesArray.Length + 1; pdbFileNumber++) // +1 is for progress update
            {
                if (progressIncrement > 0)
                {
                    ProgressActionSet.ProgressAction(progressIncrement, progressActionSet);
                    progressIncrement = 0;
                    if (pdbFileNumber >= pdbFilesArray.Length)
                    {
                        break;
                    }
                }
                ProgressActionSet.EstimatedTimeRemainingAction(startTicks, pdbFileNumber, pdbFilesArray.Length, progressActionSet);

                progressIncrement++;

                // get unique id of pdb file
                string pdbFilename = pdbFilesArray[pdbFileNumber];
                string proteinId   = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);

                // check pdb unique id was in the loaded sequence list
                if (!pdbIdList.Contains(proteinId))
                {
                    continue;
                }

                // perform clustering to detect interaction proteinInterfaces
                ClusterProteinDataBankFileResult clusterPdbFileResult = Clustering.ClusterProteinDataBankFile(cancellationToken, maxAtomInterationDistance, minimumProteinInterfaceDensity, pdbFilename, pdbIdChainIdList, ClusteringMethodOptions.ClusterWithResidueSequenceIndex, -1, -1, null);

                if (clusterPdbFileResult == null)
                {
                    continue;
                }

                int[] proteinInterfacesCount = new int[clusterPdbFileResult.ClusteringFullResultListContainer.ChainList.Count];

                for (int chainIndex = 0; chainIndex < clusterPdbFileResult.ClusteringFullResultListContainer.ChainList.Count; chainIndex++)
                {
                    int totalProteinInterfaces = clusterPdbFileResult.ProteinInterfaceAnalysisResultData.ProteinInterfacesSequenceAndPositionDataList.Count(proteinInterface => proteinInterface.FullProteinInterfaceId.ChainId == chainIndex);

                    proteinInterfacesCount[chainIndex] = totalProteinInterfaces;
                }

                var proteinInterfacesCountStr = string.Join(" ", proteinInterfacesCount.OrderBy(x => x));

                List <ProteinInterfaceSequenceAndPositionData> proteinInterfaces = clusterPdbFileResult.ProteinInterfaceAnalysisResultData.ProteinInterfacesSequenceAndPositionDataList;
                int[] proteinInterfacesLength = new int[proteinInterfaces.Count];

                for (int index = 0; index < proteinInterfaces.Count; index++)
                {
                    ProteinInterfaceSequenceAndPositionData proteinInterface = proteinInterfaces[index];

                    proteinInterfacesLength[index] = proteinInterface.ProteinInterfaceLength;
                }

                var proteinInterfacesLengthStr = string.Join(" ", proteinInterfacesLength.Distinct().OrderBy(x => x));

                if (proteinInterfacesLength.Length == 0)
                {
                    proteinInterfacesLengthStr = 0.ToString();
                }

                var chainsCountStr = clusterPdbFileResult.ClusteringFullResultListContainer.ChainList.Count;

                if (filterProteinInterfaceCountsWithoutLengths)
                {
                    var combinedKeyAll = "chains [" + chainsCountStr + "] proteinInterfaces [" + proteinInterfacesCountStr + "]";

                    if (!proteinInterfacesCountResultWithLengths.ContainsKey(combinedKeyAll))
                    {
                        proteinInterfacesCountResultWithLengths.Add(combinedKeyAll, new List <string>());
                    }

                    proteinInterfacesCountResultWithLengths[combinedKeyAll].Add(proteinId);
                }

                if (filterProteinInterfaceCountsWithLengths)
                {
                    var combinedKeyWithLengths = "chains [" + chainsCountStr + "] proteinInterfaces [" + proteinInterfacesCountStr + "] lengths [" + proteinInterfacesLengthStr + "]";

                    if (!proteinInterfacesCountResultWithLengths.ContainsKey(combinedKeyWithLengths))
                    {
                        proteinInterfacesCountResultWithLengths.Add(combinedKeyWithLengths, new List <string>());
                    }

                    proteinInterfacesCountResultWithLengths[combinedKeyWithLengths].Add(proteinId);
                }
            }

            var confirmSaveList = new List <string>();

            foreach (var kvp in proteinInterfacesCountResultWithLengths)
            {
                var seq2 = new List <ISequence>(sequenceList);
                seq2 = FilterProteins.RemoveSequences(cancellationToken, seq2, kvp.Value, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                var saveFilename = filterProteinInterfacesLengthOutputFilename;
                saveFilename = saveFilename.Replace(proteinInterfacesTemplateText, kvp.Key);

                var actualSavedFilename = SequenceFileHandler.SaveSequencesAsFasta(seq2, saveFilename, true, fileExistsOptions, progressActionSet);

                if (!string.IsNullOrWhiteSpace(actualSavedFilename))
                {
                    confirmSaveList.Add(actualSavedFilename);
                }
            }

            // Confirm the total number of sequences saved is equal to original number loaded
            ConfirmSequencesSaved(pdbIdList, confirmSaveList, progressActionSet);

            ProgressActionSet.FinishAction(true, progressActionSet);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Saves a Tree already generated by the NewickTreeFormat method.
        /// </summary>
        /// <param name="outputTreeFilename"></param>
        /// <param name="treeList"></param>
        /// <param name="fileExistsOptions"></param>
        /// <returns></returns>
        public static string SaveNewickTree(string outputTreeFilename, List <string> treeList, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            var filename = new FileInfo(FileAndPathMethods.RemoveFileExtension(outputTreeFilename) + ".tree");

            if (filename.Exists)
            {
                if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                {
                    filename = new FileInfo(FileExistsHandler.FindNextFreeOutputFilename(filename.FullName));
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                {
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                {
                    return(null);
                }
            }

            filename.Directory?.Create();

            File.WriteAllLines(filename.FullName, treeList);

            return(filename.FullName);
        }
        /// <summary>
        ///     This method iterates through the provided FASTA files creating separate calculated outputs for each of them.
        /// </summary>
        /// <param name="fastaFiles">The FASTA files to process.</param>
        /// <param name="pdbFilesFolders">The locations where PDB files may be found.</param>
        /// <param name="spreadsheetSaveFilenameTemplate">A template filename to save the outputs.</param>
        /// <param name="saveTsv"></param>
        /// <param name="saveXl"></param>
        /// <param name="cancellationToken"></param>
        /// <param name="progressActionSet"></param>
        /// <param name="fileExistsOptions"></param>
        public static void MakeHomodimerStatisticsSpreadsheetsAndOutputFiles(decimal maxAtomInterationDistance, string[] fastaFiles, string[] pdbFilesFolders, string spreadsheetSaveFilenameTemplate, bool saveTsv, bool saveXl, CancellationToken cancellationToken, ProgressActionSet progressActionSet = null, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            if (fastaFiles == null || fastaFiles.Length == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(fastaFiles));
            }

            if (pdbFilesFolders == null || pdbFilesFolders.Length == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(pdbFilesFolders));
            }

            if (!saveTsv && !saveXl)
            {
                throw new ArgumentOutOfRangeException(nameof(saveTsv));
            }

            for (int fastaFileNumber = 0; fastaFileNumber < fastaFiles.Length; fastaFileNumber++)
            {
                string fastaFilename = fastaFiles[fastaFileNumber];

                if (string.IsNullOrWhiteSpace(fastaFilename))
                {
                    continue;
                }

                ProgressActionSet.Report("Attempting to open file: " + fastaFilename, progressActionSet);

                List <ISequence> sequences = SequenceFileHandler.LoadSequenceFile(fastaFilename, StaticValues.MolNameProteinAcceptedValues);

                var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequences);

                if ((sequences == null) || (sequences.Count == 0))
                {
                    ProgressActionSet.Report("Error could not load file: " + fastaFilename, progressActionSet);
                    continue;
                }
                ProgressActionSet.Report("Loaded " + sequences.Count + " sequences from file: " + fastaFilename, progressActionSet);

                List <string> pdbIdList      = FilterProteins.SequenceListToPdbIdList(sequences);
                string        appendFilename = FileAndPathMethods.FullPathToFilename(fastaFilename);

                ProgressActionSet.Report("Creating spreadsheets...", progressActionSet);
                Stopwatch stopwatch       = Stopwatch.StartNew();
                var       spreadsheetList = MakeHomodimerStatisticsSpreadsheetsList(cancellationToken, maxAtomInterationDistance, pdbFilesFolders, pdbIdList, pdbIdChainIdList, progressActionSet);
                stopwatch.Stop();
                ProgressActionSet.Report("Finished calculating spreadsheet data [Elapsed: " + stopwatch.Elapsed.ToString(@"dd\:hh\:mm\:ss") + "]", progressActionSet);

                if (cancellationToken.IsCancellationRequested)
                {
                    //UserProteinInterfaceOperations.ProgressBarReset(progressBar, 0, 100, 0);
                    ////UserProteinInterfaceOperations.LabelEstimatedTimeRemainingUpdate(estimatedTimeRemaining, 0, 1, 1);

                    ProgressActionSet.StartAction(100, progressActionSet);
                    ProgressActionSet.ProgressAction(100, progressActionSet);
                    ProgressActionSet.FinishAction(false, progressActionSet);
                    ProgressActionSet.Report("Cancelled.", progressActionSet);
                    break;
                }


                for (int spreadsheetIndex = 0; spreadsheetIndex < spreadsheetList.Count; spreadsheetIndex++)
                {
                    var spreadsheet = spreadsheetList[spreadsheetIndex];

                    if (cancellationToken.IsCancellationRequested)
                    {
                        break;
                    }



                    // Remove the first row (which has the name for use in a worksheet title, not currently used)
                    var sheetName       = spreadsheet[0][0].CellData;
                    var spreadsheetName = spreadsheet[1][0].CellData;
                    spreadsheet.RemoveAt(0);

                    // "c:/dResults/Results - %date% %time% - %fasta_filename% - %spreadsheet_name%.tsv"
                    string saveFilename = spreadsheetSaveFilenameTemplate;

                    saveFilename = saveFilename.Replace("%spreadsheet_name%", spreadsheetName);
                    saveFilename = saveFilename.Replace("%fasta_filename%", appendFilename);
                    saveFilename = saveFilename.Replace("%date%", DateTime.Now.ToString("yyyy-MM-dd"));
                    saveFilename = saveFilename.Replace("%time%", DateTime.Now.ToString("HH.mm.ss"));
                    saveFilename = saveFilename.Replace("%batch_number%", ""); //string.Empty + (fastaFileNumber + 1));
                    saveFilename = saveFilename.Replace("%batch_letter%", ""); //SpreadsheetFileHandler.AlphabetLetterRollOver(spreadsheetIndex));

                    sheetName = sheetName.Replace("%spreadsheet_name%", spreadsheetName);
                    sheetName = sheetName.Replace("%fasta_filename%", appendFilename);
                    sheetName = sheetName.Replace("%date%", DateTime.Now.ToString("yyyy-MM-dd"));
                    sheetName = sheetName.Replace("%time%", DateTime.Now.ToString("HH.mm.ss"));
                    sheetName = sheetName.Replace("%batch_number%", ""); //string.Empty + (fastaFileNumber + 1));
                    sheetName = sheetName.Replace("%batch_letter%", ""); //SpreadsheetFileHandler.AlphabetLetterRollOver(spreadsheetIndex));


                    //var tsvFilename = new FileInfo(FileAndPathMethods.RemoveFileExtension(saveFilename) + ".tsv");

                    var xlFilename = new FileInfo(FileAndPathMethods.RemoveFileExtension(saveFilename) + ".xlsx");

                    var savedFiles = SpreadsheetFileHandler.SaveSpreadsheet(xlFilename.FullName, new[] { sheetName }, spreadsheet, null, saveTsv, saveXl, fileExistsOptions);

                    ProgressActionSet.ReportFilesSaved(savedFiles, progressActionSet);
                }
            }

            ProgressActionSet.Report("Finished processing files.", progressActionSet);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Outputs distance matrix in cluto dense/sparse matrix format (*.mat), row headings in cluto row label file format (*.mat.rlabel) and column headings in cluto column label file format (*.mat.clabel)
        /// </summary>
        /// <param name="distanceMatrix"></param>
        /// <param name="matrixOutputFilename"></param>
        /// <param name="rowLabels"></param>
        /// <param name="rowLabelsOutputFilename"></param>
        /// <param name="columnLabels"></param>
        /// <param name="columnLabelsOutputFilename"></param>
        /// <param name="zeroHalf"></param>
        /// <param name="sprase"></param>
        /// <param name="clutoMatrixFormatTypes"></param>
        /// <param name="fileExistsOptions"></param>
        /// <returns></returns>
        public static string[] ConvertToMatrixFile(decimal[,] distanceMatrix, string matrixOutputFilename, string[] rowLabels = null, string rowLabelsOutputFilename = null, string[] columnLabels = null, string columnLabelsOutputFilename = null, ClutoMatrixFormatTypes clutoMatrixFormatTypes = ClutoMatrixFormatTypes.SparseMatrixTopHalf, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            if (distanceMatrix == null || distanceMatrix.Length == 0)
            {
                throw new ArgumentNullException(nameof(distanceMatrix), "null or empty");
            }

            if (string.IsNullOrWhiteSpace(matrixOutputFilename))
            {
                throw new ArgumentNullException(nameof(matrixOutputFilename), "null or empty");
            }

            if (rowLabels != null && rowLabels.Length != distanceMatrix.GetLength(1))
            {
                throw new ArgumentOutOfRangeException(nameof(rowLabels), "length not equal to matrix rows");
            }

            if (columnLabels != null && columnLabels.Length != distanceMatrix.GetLength(0))
            {
                throw new ArgumentOutOfRangeException(nameof(columnLabels), "length not equal to matrix columns");
            }

            if (columnLabels != null && string.IsNullOrWhiteSpace(columnLabelsOutputFilename))
            {
                throw new ArgumentNullException(nameof(columnLabelsOutputFilename), "null or empty");
            }

            if (rowLabels != null && string.IsNullOrWhiteSpace(rowLabelsOutputFilename))
            {
                throw new ArgumentNullException(nameof(rowLabelsOutputFilename), "null or empty");
            }

            if (clutoMatrixFormatTypes == ClutoMatrixFormatTypes.DenseMatrixTopHalf || clutoMatrixFormatTypes == ClutoMatrixFormatTypes.SparseMatrixTopHalf)
            {
                distanceMatrix = DiagonalZeroHalfMatrix(distanceMatrix, true);
            }
            else if (clutoMatrixFormatTypes == ClutoMatrixFormatTypes.DenseMatrixBottomHalf || clutoMatrixFormatTypes == ClutoMatrixFormatTypes.SparseMatrixBottomHalf)
            {
                distanceMatrix = DiagonalZeroHalfMatrix(distanceMatrix, false);
            }

            bool sparse = clutoMatrixFormatTypes == ClutoMatrixFormatTypes.SparseMatrixBottomHalf || clutoMatrixFormatTypes == ClutoMatrixFormatTypes.SparseMatrixTopHalf || clutoMatrixFormatTypes == ClutoMatrixFormatTypes.SparseMatrixComplete;

            var result = new List <List <string> >();

            result.Add(new List <string>());

            var rowTotal     = distanceMatrix.GetLength(1);
            var columnTotal  = distanceMatrix.GetLength(0);
            var nonZeroTotal = distanceMatrix.Cast <decimal>().Count(a => a != 0);

            result[0].Add(rowTotal.ToString());
            result[0].Add(columnTotal.ToString());

            if (sparse)
            {
                result[0].Add(nonZeroTotal.ToString());
            }

            for (var y = 0; y < rowTotal; y++)
            {
                result.Add(new List <string>());

                for (var x = 0; x < columnTotal; x++)
                {
                    var value = distanceMatrix[x, y];

                    if (sparse)
                    {
                        if (value == 0)
                        {
                            continue;
                        }
                        result[result.Count - 1].Add(x.ToString());
                        result[result.Count - 1].Add(value.ToString());
                    }
                    else
                    {
                        result[result.Count - 1].Add(value.ToString());
                    }
                }
            }

            var lines = result.Select(a => string.Join(" ", a)).ToList();

            var savedFiles = new List <string>();

            if (File.Exists(matrixOutputFilename))
            {
                if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                {
                    matrixOutputFilename = FileExistsHandler.FindNextFreeOutputFilename(matrixOutputFilename);
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                {
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                {
                    return(savedFiles.ToArray());
                }
            }


            FileAndPathMethods.CreateDirectory(matrixOutputFilename);
            File.WriteAllLines(matrixOutputFilename, lines);
            savedFiles.Add(matrixOutputFilename);

            if (rowLabels != null && !string.IsNullOrWhiteSpace(rowLabelsOutputFilename))
            {
                var saveRowLabels = true;

                if (File.Exists(rowLabelsOutputFilename))
                {
                    if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                    {
                        rowLabelsOutputFilename = FileExistsHandler.FindNextFreeOutputFilename(rowLabelsOutputFilename);
                    }
                    else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                    {
                    }
                    else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                    {
                        saveRowLabels = false;
                    }
                }

                if (saveRowLabels)
                {
                    FileAndPathMethods.CreateDirectory(rowLabelsOutputFilename);
                    File.WriteAllLines(rowLabelsOutputFilename, rowLabels);
                    savedFiles.Add(rowLabelsOutputFilename);
                }
            }

            if (columnLabels != null && !string.IsNullOrWhiteSpace(columnLabelsOutputFilename))
            {
                var saveColumnLabels = true;

                if (File.Exists(columnLabelsOutputFilename))
                {
                    if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                    {
                        columnLabelsOutputFilename = FileExistsHandler.FindNextFreeOutputFilename(columnLabelsOutputFilename);
                    }
                    else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                    {
                    }
                    else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                    {
                        saveColumnLabels = false;
                    }
                }

                if (saveColumnLabels)
                {
                    FileAndPathMethods.CreateDirectory(columnLabelsOutputFilename);
                    File.WriteAllLines(columnLabelsOutputFilename, columnLabels);
                    savedFiles.Add(columnLabelsOutputFilename);
                }
            }

            return(savedFiles.ToArray());
        }
Ejemplo n.º 6
0
        /// <summary>
        /// </summary>
        /// <returns>The filenames of the newly saved files</returns>
        public static string[] SaveSpreadsheet(string saveFilename, string[] sheetNames, List <List <SpreadsheetCell[]> > spreadsheet, ProgressActionSet progressActionSet = null, bool tsvFormat = false, bool xlsxFormat = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            if (!tsvFormat && !xlsxFormat)
            {
                throw new ArgumentException("Spreadsheet must be either TSV and/or XLSX format");
            }

            var result = new List <string>();

            if (tsvFormat)
            {
                string[] tsvSavedFiles = SaveSpreadsheetTsv(saveFilename, sheetNames, spreadsheet, progressActionSet, fileExistsOptions);
                result.AddRange(tsvSavedFiles);
            }

            if (xlsxFormat)
            {
                string[] xlSavedFiles = SaveSpreadsheetXl(saveFilename, sheetNames, spreadsheet, progressActionSet, fileExistsOptions);
                result.AddRange(xlSavedFiles);
            }

            return(result.ToArray());
        }
Ejemplo n.º 7
0
        /// <summary>
        ///     Filters the given FASTA files and PDB files with the given options and saves the results to disk.  Data needs to be
        ///     cleaned for two reasons, firstly to not pollute or distort the results, and secondly to save unnecessary processing
        ///     operations.
        /// </summary>
        /// <param name="cancellationToken"></param>
        /// <param name="pdbFilesFolders"></param>
        /// <param name="fastaFiles"></param>
        /// <param name="proteinOperationOptionFlags"></param>
        /// <param name="saveFastaFilenameTemplate"></param>
        /// <param name="consoleTextBox"></param>
        /// <param name="progressBar"></param>
        /// <param name="estimatedTimeRemaining"></param>
        public static void CleanProteins(CancellationToken cancellationToken, decimal maxAtomInterationDistance, string[] pdbFilesFolders, string[] fastaFiles, ProteinOperation proteinOperationOptionFlags, string saveFastaFilenameTemplate, ProgressActionSet progressActionSet, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            if (pdbFilesFolders == null || pdbFilesFolders.Length == 0)
            {
                if (proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveMultipleModelsInStructure) || proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonInteractingProteinsInStructure) || proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonSymmetricalInStructure) || proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveWrongNumberOfChainsInStructure))
                {
                    throw new ArgumentOutOfRangeException(nameof(pdbFilesFolders));
                }
            }

            if (fastaFiles == null || fastaFiles.Length == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(fastaFiles));
            }

            if (string.IsNullOrWhiteSpace(saveFastaFilenameTemplate))
            {
                throw new ArgumentOutOfRangeException(nameof(saveFastaFilenameTemplate));
            }

            string[] pdbFilesArray = ProteinDataBankFileOperations.GetPdbFilesArray(pdbFilesFolders);
            //List<string> pdbIdList = null;
            int    beforeCount             = 0;
            int    afterCount              = 0;
            string saveFilename            = saveFastaFilenameTemplate;
            var    currentProteinOperation = ProteinOperation.LoadFile;

            int[] numberSequencesLoaded;
            var   sequences = new List <ISequence> [3];

            //UserProteinInterfaceOperations.TextBoxClear(consoleTextBox);
            ProgressActionSet.Report("Filtering proteins.", progressActionSet);

            // Load fasta/sequence files.
            sequences[0] = SequenceFileHandler.LoadSequenceFileList(fastaFiles, StaticValues.MolNameProteinAcceptedValues, out numberSequencesLoaded, true);
            var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequences[0]);

            for (int numberSequencesLoadedIndex = 0; numberSequencesLoadedIndex < numberSequencesLoaded.Length; numberSequencesLoadedIndex++)
            {
                if (numberSequencesLoaded[numberSequencesLoadedIndex] > 0)
                {
                    ProgressActionSet.Report("Loaded " + numberSequencesLoaded[numberSequencesLoadedIndex] / 2 + " proteins from file: " + fastaFiles[numberSequencesLoadedIndex], progressActionSet);
                }
                else
                {
                    ProgressActionSet.Report("Error could not load file: " + fastaFiles[numberSequencesLoadedIndex], progressActionSet);
                }
            }

            if (numberSequencesLoaded.Count(a => a > 0) == 0)
            {
                return;
            }

            // Replace placeholder variable names.
            saveFilename = saveFilename.Replace("%date%", DateTime.Now.ToString("yyyy-MM-dd"));
            saveFilename = saveFilename.Replace("%time%", DateTime.Now.ToString("HH.mm.ss"));

            // Save initial loaded sequences.

            if (File.Exists(saveFilename))
            {
                if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                {
                    saveFilename = FileExistsHandler.FindNextFreeOutputFilename(saveFilename);
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                {
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                {
                    return;
                }
            }

            // Removes any entries not having a protein alphabet.
            while (currentProteinOperation != ProteinOperation.Finished)
            {
                if (cancellationToken.IsCancellationRequested)
                {
                    break;
                }

                currentProteinOperation = (ProteinOperation)((int)currentProteinOperation * 2);
                sequences[1]            = null;
                sequences[2]            = null;
                var sequencesDescriptions = new string[3];

                if (currentProteinOperation == ProteinOperation.Finished)
                {
                    break;
                }
                if (currentProteinOperation == ProteinOperation.RemoveNonProteinAlphabetInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonProteinAlphabetInSequence))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveWrongNumberOfChainsInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveWrongNumberOfChainsInSequence))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveExactDuplicatesInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveExactDuplicatesInSequence))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveNonHomodimersInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonHomodimersInSequence))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveWrongNumberOfChainsInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveWrongNumberOfChainsInStructure))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveMultipleModelsInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveMultipleModelsInStructure))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveNonInteractingProteinsInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonInteractingProteinsInStructure))
                {
                    continue;
                }
                if (currentProteinOperation == ProteinOperation.RemoveNonSymmetricalInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonSymmetricalInStructure))
                {
                    continue;
                }

                // Count sequences before operation.
                beforeCount = sequences[0].Count / 2;

                // Update user about what is happening.
                ProgressActionSet.Report("", progressActionSet);
                ProgressActionSet.Report("Removing " + ProteinOperationString(currentProteinOperation) + " entries [from " + beforeCount + " proteins]", progressActionSet);

                // Start stopwatch to count duration of operation.
                Stopwatch stopwatch = Stopwatch.StartNew();

                // Perform specified operation.
                switch (currentProteinOperation)
                {
                case ProteinOperation.RemoveNonProteinAlphabetInSequence:
                {
                    sequencesDescriptions[0] = "01 - Removed non-protein sequences (sequence filter)";
                    sequences[0]             = FilterProteins.RemoveNonProteinAlphabetSequences(cancellationToken, sequences[0], progressActionSet);
                    break;
                }

                case ProteinOperation.RemoveWrongNumberOfChainsInSequence:
                {
                    sequencesDescriptions[0] = "02 - Removed non-dimers (sequence filter)";
                    sequences[0]             = FilterProteins.RemoveSequencesWithIncorrectNumberOfChains(cancellationToken, sequences[0], 2, progressActionSet);
                    break;
                }

                case ProteinOperation.RemoveExactDuplicatesInSequence:
                {
                    sequencesDescriptions[0] = "03 - Removed exact duplicates (sequence filter)";
                    sequences[0]             = FilterProteins.RemoveDuplicates(cancellationToken, sequences[0], progressActionSet);
                    break;
                }

                case ProteinOperation.RemoveNonHomodimersInSequence:
                {
                    // homodimers - all types - unfiltered for interactions or symmetry

                    var result = FilterProteins.SplitDimerTypes(cancellationToken, sequences[0], 30, 90, progressActionSet);

                    sequencesDescriptions[0] = "04 - Homodimers only (sequence filter)";
                    sequences[0]             = result.HomoDimerPdbIdList;

                    sequencesDescriptions[1] = "04 - Heterodimers only (sequence filter)";
                    sequences[1]             = result.HeteroDimerPdbIdList;

                    sequencesDescriptions[2] = "04 - Homology dimers only (sequence filter)";
                    sequences[2]             = result.HomologyDimerPdbIdList;
                    break;
                }

                case ProteinOperation.RemoveMultipleModelsInStructure:
                {
                    sequencesDescriptions[0] = "05 - Removed multiple models (structure filter)";
                    List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]);
                    pdbIdList    = FilterProteins.RemoveMultipleStructureModels(cancellationToken, pdbFilesFolders, pdbIdList, progressActionSet);
                    sequences[0] = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesInList);
                    break;
                }

                case ProteinOperation.RemoveWrongNumberOfChainsInStructure:
                {
                    sequencesDescriptions[0] = "06 - Removed non-dimers (structure filter)";
                    List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]);

                    //var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequences[0]);

                    pdbIdList    = FilterProteins.RemoveStructuresWithIncorrectNumberOfChains(cancellationToken, pdbFilesFolders, pdbIdList, pdbIdChainIdList, 2, progressActionSet);
                    sequences[0] = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesInList);
                    break;
                }

                case ProteinOperation.RemoveNonInteractingProteinsInStructure:
                {
                    // Make copy of sequences as we will split the list into two parts - with and without interactions.
                    sequences[1] = new List <ISequence>(sequences[0]);

                    // Get pdb id list from sequences, to check for pdb file, load, perform processing.
                    List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]);

                    // Makes a list of sequences with interactions.
                    pdbIdList = FilterProteins.RemoveSequencesWithoutInteractions(cancellationToken, maxAtomInterationDistance, pdbFilesFolders, pdbIdList, pdbIdChainIdList, progressActionSet);

                    // Remove any protein not in the list, keep the ones in the list.
                    sequencesDescriptions[0] = "08 - dimers - with interactions - unfiltered for symmetry";
                    sequences[0]             = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                    sequencesDescriptions[1] = "07 - dimers - no observed interactions";
                    sequences[1]             = FilterProteins.RemoveSequences(cancellationToken, sequences[1], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesInList);
                    break;
                }

                case ProteinOperation.RemoveNonSymmetricalInStructure:
                {
                    // Make copy of sequences as we will split the list into two parts - with and without symmetry.
                    List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]);
                    sequences[1] = new List <ISequence>(sequences[0]);
                    sequences[2] = new List <ISequence>(sequences[0]);
                    Dictionary <string, decimal> symmetryPercentage = FilterProteins.CalculateStructureSymmetry(cancellationToken, maxAtomInterationDistance, pdbFilesFolders, pdbIdList, pdbIdChainIdList, progressActionSet);

                    var pdbSymmetrical     = new List <string>();
                    var pdbPartSymmetrical = new List <string>();
                    var pdbNonSymmetrical  = new List <string>();

                    foreach (var symmetryPercentageKeyValuePair in symmetryPercentage)
                    {
                        if (symmetryPercentageKeyValuePair.Value == 0.0m)
                        {
                            pdbNonSymmetrical.Add(symmetryPercentageKeyValuePair.Key);
                        }
                        else if (symmetryPercentageKeyValuePair.Value == 100.0m)
                        {
                            pdbSymmetrical.Add(symmetryPercentageKeyValuePair.Key);
                        }
                        else if (symmetryPercentageKeyValuePair.Value > 0.0m && symmetryPercentageKeyValuePair.Value < 100.0m)
                        {
                            pdbPartSymmetrical.Add(symmetryPercentageKeyValuePair.Key);
                        }
                        else
                        {
                            ProgressActionSet.Report("Error: Out of bounds symmetry value of " + symmetryPercentageKeyValuePair.Value + " was found in " + symmetryPercentageKeyValuePair.Key + ".", progressActionSet);
                        }
                    }

                    sequencesDescriptions[0] = "11 - dimers - with interactions - 100% symmetrical";
                    sequences[0]             = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbSymmetrical, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                    sequencesDescriptions[1] = "10 - dimers - with interactions - 1% to 99% symmetrical";
                    sequences[1]             = FilterProteins.RemoveSequences(cancellationToken, sequences[1], pdbPartSymmetrical, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                    sequencesDescriptions[2] = "09 - dimers - with interactions - 0% symmetrical";
                    sequences[2]             = FilterProteins.RemoveSequences(cancellationToken, sequences[2], pdbNonSymmetrical, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList);

                    break;
                }
                }

                // Stop stopwatch immediately after operation.
                stopwatch.Stop();

                // Count sequences after operation.
                afterCount = sequences[0].Count / 2;

                if (!cancellationToken.IsCancellationRequested)
                {
                    for (int sequencesIndex = sequences.GetLowerBound(0); sequencesIndex <= sequences.GetUpperBound(0); sequencesIndex++)
                    {
                        if (sequences[sequencesIndex] != null)
                        {
                            // Find free filename to save the latest sequence results of operations.
                            string localSaveFilename = saveFilename;
                            localSaveFilename = localSaveFilename.Replace("%fasta_filename%", sequencesDescriptions[sequencesIndex]);


                            bool skipFile = false;

                            if (File.Exists(localSaveFilename))
                            {
                                if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                                {
                                    localSaveFilename = FileExistsHandler.FindNextFreeOutputFilename(localSaveFilename);
                                }
                                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                                {
                                }
                                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                                {
                                    skipFile = true;
                                }
                            }


                            if (!skipFile)
                            {
                                // Save the sequence results to previous set filename.
                                string savedFile /*s*/ = SequenceFileHandler.SaveSequencesAsFasta(sequences[sequencesIndex], localSaveFilename);

                                // Inform user that file has been saved.
                                //foreach (char savedFile in savedFiles)
                                //{
                                ProgressActionSet.Report("Saved file: " + savedFile, progressActionSet);
                                //}
                            }
                        }
                    }

                    // Update the user about the results.
                    ProgressActionSet.Report("Removed " + (beforeCount - afterCount) + " proteins. [" + afterCount + " proteins remaining]. Elapsed: " + stopwatch.Elapsed.ToString(@"dd\:hh\:mm\:ss"), progressActionSet);
                }
            }

            if (!cancellationToken.IsCancellationRequested)
            {
                ProgressActionSet.Report("Finished all selected filtering operations.", progressActionSet);
            }
            else
            {
                ProgressActionSet.Report("Cancelled.", progressActionSet);
                //UserProteinInterfaceOperations.ProgressBarReset(progressBar, 0, 100, 0);
                ////UserProteinInterfaceOperations.LabelEstimatedTimeRemainingUpdate(estimatedTimeRemaining, 0, 1, 1);

                ProgressActionSet.StartAction(100, progressActionSet);
                ProgressActionSet.ProgressAction(100, progressActionSet);
                ProgressActionSet.FinishAction(false, progressActionSet);
            }
        }
Ejemplo n.º 8
0
        public static string[] SaveSpreadsheet(string saveFilename, string[] sheetNames, SpreadsheetCell[,] spreadsheet, ProgressActionSet progressActionSet = null, bool tsvFormat = false, bool xlsxFormat = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            var convertedSpreadsheet = ConvertTypes.SpreadsheetCell2DArrayToJaggedArray(spreadsheet);

            return(SaveSpreadsheet(saveFilename, sheetNames, convertedSpreadsheet.ToList(), progressActionSet, tsvFormat, xlsxFormat, fileExistsOptions));
        }
Ejemplo n.º 9
0
 public static string[] SaveSpreadsheet(string saveFilename, string[] sheetNames, List <SpreadsheetCell[]> spreadsheet, ProgressActionSet progressActionSet = null, bool tsvFormat = false, bool xlsxFormat = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
 {
     return(SaveSpreadsheet(saveFilename, sheetNames, new List <List <SpreadsheetCell[]> >()
     {
         spreadsheet
     }, progressActionSet, tsvFormat, xlsxFormat, fileExistsOptions));
 }
Ejemplo n.º 10
0
 public static string[] SaveSpreadsheet(string saveFilename, string[] sheetNames, SpreadsheetCell[][] spreadsheet, ProgressActionSet progressActionSet = null, bool tsvFormat = false, bool xlsxFormat = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
 {
     return(SaveSpreadsheet(saveFilename, sheetNames, spreadsheet.Select(a => a.ToArray()).ToList(), progressActionSet, tsvFormat, xlsxFormat, fileExistsOptions));
 }
Ejemplo n.º 11
0
        public static string[] SaveSpreadsheet(string saveFilename, string[] sheetNames, List <SpreadsheetCell[, ]> spreadsheet, ProgressActionSet progressActionSet = null, bool tsvFormat = false, bool xlsxFormat = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            var convertedSpreadsheet = new List <List <SpreadsheetCell[]> >();

            foreach (var sheet in spreadsheet)
            {
                var convertedSheet = ConvertTypes.SpreadsheetCell2DArrayToJaggedArray(sheet).ToList();
                convertedSpreadsheet.Add(convertedSheet);
            }

            return(SaveSpreadsheet(saveFilename, sheetNames, convertedSpreadsheet, progressActionSet, tsvFormat, xlsxFormat, fileExistsOptions));
        }
Ejemplo n.º 12
0
        public static string[] SaveSpreadsheetXl(string saveFilename, string[] sheetNames, List <List <SpreadsheetCell[]> > spreadsheet, ProgressActionSet progressActionSet = null, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            var result = new List <string>();

            var xlFilename = new FileInfo(FileAndPathMethods.RemoveFileExtension(saveFilename) + ".xlsx");

            if (xlFilename.Exists)
            {
                if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                {
                    xlFilename = new FileInfo(FileExistsHandler.FindNextFreeOutputFilename(xlFilename.FullName));
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                {
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                {
                    return(result.ToArray());
                }
            }

            if (xlFilename.Directory != null)
            {
                xlFilename.Directory.Create();
            }

            var totalSheets = spreadsheet.Count;


            string sheetName = XlSheetNameVerification(Path.GetFileNameWithoutExtension(xlFilename.Name));

            if (sheetNames == null || sheetNames.Length < spreadsheet.Count)
            {
                var extraSheetNames = new string[totalSheets];

                if (sheetNames != null && sheetNames.Length > 0)
                {
                    Array.Copy(sheetNames, extraSheetNames, sheetNames.Length);
                }

                for (var sheetIndex = sheetNames != null ? sheetNames.Length : 0; sheetIndex < extraSheetNames.Length; sheetIndex++)
                {
                    extraSheetNames[sheetIndex] = sheetName + (sheetIndex + 1);
                }

                sheetNames = extraSheetNames;
            }

            for (int index = 0; index < sheetNames.Length; index++)
            {
                if (string.IsNullOrWhiteSpace(sheetNames[index]))
                {
                    sheetNames[index] = sheetName + (index + 1);
                }

                sheetNames[index] = XlSheetNameVerification(sheetNames[index]);
            }


            XlSpreadsheetDocumentContainer xlSpreadsheet = ExcelCreateSpreadsheet.CreateSpreadsheetWorkbook(xlFilename.FullName, sheetNames);


            //Worksheet worksheet1 = xlSpreadsheet.WorksheetPart.Worksheet;//new Worksheet();

            var runPropertiesArray = GetAminoAcidRunProperties();

            for (uint sheetIndex = 0; sheetIndex < spreadsheet.Count; sheetIndex++)
            {
                var worksheet1 = new Worksheet();

                var sheetData1 = new SheetData();


                for (uint rowIndex = 0; rowIndex < spreadsheet[(int)sheetIndex].Count; rowIndex++)
                {
                    var rowDataArray = spreadsheet[(int)sheetIndex][(int)rowIndex];

                    if (rowDataArray == null)
                    {
                        continue;
                    }

                    var row1 = new Row {
                        RowIndex = (UInt32Value)rowIndex + 1                  /*, Spans = new ListValue<StringValue>() { InnerText = "1:3" }, DyDescent = 0.25D*/
                    };
                    //var row4 = new Row(){ RowIndex = (UInt32Value)4U, Spans = new ListValue<StringValue>() { InnerText = "1:2" }, DyDescent = 0.25D };

                    for (uint columnIndex = 0; columnIndex < rowDataArray.Length; columnIndex++)
                    {
                        string columnValue = rowDataArray[columnIndex].CellData;

                        if (string.IsNullOrWhiteSpace(columnValue))
                        {
                            continue;
                        }

                        string columnName = AlphabetLetterRollOver((int)columnIndex);
                        string cellRef    = columnName + (rowIndex + 1);

                        var cell1 = new Cell {
                            CellReference = cellRef, StyleIndex = 1U
                        };

                        switch (rowDataArray[columnIndex].SpreadsheetDataType)
                        {
                        case SpreadsheetDataTypes.String:
                            cell1.DataType = CellValues.String;
                            break;

                        case SpreadsheetDataTypes.Integer:
                            cell1.DataType = CellValues.Number;
                            break;

                        case SpreadsheetDataTypes.Double:
                            cell1.DataType = CellValues.Number;
                            break;

                        case SpreadsheetDataTypes.Decimal:
                            cell1.DataType = CellValues.Number;
                            break;
                        }

                        //InlineString inlineString1 = new InlineString();
                        //Text text1 = new Text();
                        //text1.Text = columnValue;

                        //inlineString1.Append(text1);

                        //cell1.Append(inlineString1);


                        //if (rowDataArray[columnIndex].CellColourScheme == SpreadsheetCellColourScheme.Default)
                        //{
                        var cellValue1 = new CellValue();
                        cellValue1.Text = columnValue;
                        cell1.Append(cellValue1);
                        row1.Append(cell1);
                        //}
                        //else if (rowDataArray[columnIndex].CellColourScheme == SpreadsheetCellColourScheme.AminoAcidsUniProtKb)
                        //{

                        //    foreach (var ch in rowDataArray[columnIndex].CellData)
                        //    {
                        //        var subgroups = AminoAcidGroups.AminoAcidGroups.ConvertAminoAcidNameCodeToGroupNumbers(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.UniProtKb, ch);
                        //        var firstGroup = subgroups[0];
                        //        //var groupColours = AminoAcidGroups.AminoAcidGroups.GetGroupColors(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.UniProtKb);
                        //        //var firstGroupColour = groupColours[firstGroup];

                        //        var runProperties = runPropertiesArray[(int) AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.UniProtKb][firstGroup];

                        //        Run run = new Run();
                        //        Text text = new Text();
                        //        text.Text = ""+ch;

                        //        run.Append(runProperties);
                        //        run.Append(text);

                        //        cell1.Append(run);
                        //        row1.Append(cell1);
                        //    }
                        //}

                        //worksheetPart1.Worksheet = worksheet1;


                        //var cell = ExcelSheets.InsertCellInWorksheet(ProteinInterfaceDetection.AlphabetLetterRollOver(columnIndex), rowIndex + 1, xlSpreadsheet.WorksheetPart);
                        //ExcelSheets.InsertText(xlSpreadsheet.SpreadsheetDocument, xlSpreadsheet.WorksheetPart, cell, "Test");
                        //cell.DataType = new EnumValue<CellValues>(CellValues.String);//.SharedString);
                        //cell.CellValue = new CellValue("test");

                        //xlSpreadsheet.WorksheetPart.Worksheet.Save();
                    }
                    sheetData1.Append(row1);
                }

                worksheet1.Append(sheetData1);

                //xlSpreadsheet.WorksheetPart.Worksheet = worksheet1;
                xlSpreadsheet.WorkbookPartObject.WorksheetParts.ToList()[(int)sheetIndex].Worksheet = worksheet1;
            }

            //xlSpreadsheet.WorksheetPart.Worksheet.Save();
            //xlSpreadsheet.WorkbookPart.Workbook.Save();
            //xlSpreadsheet.SpreadsheetDocument.WorkbookPart.Workbook.Save();

            xlSpreadsheet.SpreadsheetDocumentObject.Close();

            result.Add(xlFilename.FullName);
            return(result.ToArray());
        }
Ejemplo n.º 13
0
 public static string[] SaveSpreadsheetXl(string saveFilename, string[] sheetNames, List <SpreadsheetCell[]> spreadsheet, ProgressActionSet progressActionSet = null, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
 {
     return(SaveSpreadsheetXl(saveFilename, sheetNames, new List <List <SpreadsheetCell[]> >()
     {
         spreadsheet
     }, progressActionSet, fileExistsOptions));
 }
Ejemplo n.º 14
0
        public static string[] SaveSpreadsheetTsv(string saveFilename, List <SpreadsheetCell[]> spreadsheet, ProgressActionSet progressActionSet = null, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            var result      = new List <string>();
            var tsvFilename = new FileInfo(FileAndPathMethods.RemoveFileExtension(saveFilename) + ".tsv");

            if (tsvFilename.Exists)
            {
                if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                {
                    tsvFilename = new FileInfo(FileExistsHandler.FindNextFreeOutputFilename(tsvFilename.FullName));
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                {
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                {
                    return(result.ToArray());
                }
            }

            var stringBuilder = new StringBuilder();

            for (int rowIndex = 0; rowIndex < spreadsheet.Count; rowIndex++)
            {
                var rowDataArray = spreadsheet[rowIndex];

                if (rowDataArray == null)
                {
                    continue;
                }

                stringBuilder.AppendLine(string.Join("\t", rowDataArray.Select(row => row.CellData)));
            }

            if (tsvFilename.Directory != null)
            {
                tsvFilename.Directory.Create();
            }

            File.WriteAllText(tsvFilename.FullName, stringBuilder.ToString());
            result.Add(tsvFilename.FullName);

            return(result.ToArray());
        }
Ejemplo n.º 15
0
        public static string[] SaveSpreadsheetTsv(string saveFilename, string[] sheetNames, List <List <SpreadsheetCell[]> > spreadsheet, ProgressActionSet progressActionSet = null, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
        {
            var resultList = new List <string>();

            for (int sheetIndex = 0; sheetIndex < spreadsheet.Count; sheetIndex++)
            {
                var padLen = ("" + spreadsheet.Count).Length;
                if (padLen < 2)
                {
                    padLen = 2;
                }
                var paddedSheetIndex = ("" + (sheetIndex + 1)).PadLeft(padLen, '0');

                var sheetName = "";
                if (sheetNames != null && sheetNames.Length > sheetIndex)
                {
                    sheetName = sheetNames[sheetIndex];
                }

                if (string.IsNullOrWhiteSpace(sheetName))
                {
                    sheetName = Path.GetFileNameWithoutExtension(saveFilename) + " [sheet " + paddedSheetIndex + "]";
                }

                var saveSheetFilename = FileAndPathMethods.MergePathAndFilename(Path.GetDirectoryName(saveFilename), sheetName + Path.GetExtension(saveFilename));
                var result            = SaveSpreadsheetTsv(saveSheetFilename, spreadsheet[sheetIndex], progressActionSet, fileExistsOptions);
                resultList.AddRange(result);
            }
            return(resultList.ToArray());
        }
        /// <summary>
        ///     Save to disk a list of sequences in FASTA format.
        /// </summary>
        /// <param name="sequences"></param>
        /// <param name="saveFilename"></param>
        public static string SaveSequencesAsFasta(List <ISequence> sequences, string saveFilename, bool appendSequenceCountToFilename = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename, ProgressActionSet progressActionSet = null)
        {
            if (sequences == null) // || sequences.Count == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(sequences));
            }

            if (string.IsNullOrWhiteSpace(saveFilename))
            {
                throw new ArgumentOutOfRangeException(nameof(saveFilename));
            }

            string result = null; // new List<string>();


            if (appendSequenceCountToFilename)
            {
                saveFilename = AddSequenceAndProteinCountToFilename(sequences, saveFilename);
            }

            // make sure directory exists
            var fileInfo = new FileInfo(saveFilename);

            if (fileInfo.Exists)
            {
                if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                {
                    fileInfo = new FileInfo(FileExistsHandler.FindNextFreeOutputFilename(fileInfo.FullName));

                    if (progressActionSet != null)
                    {
                        ProgressActionSet.Report("Save sequence: already exists, appended number: " + fileInfo.FullName, progressActionSet);
                    }
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                {
                    if (progressActionSet != null)
                    {
                        ProgressActionSet.Report("Save sequence: overwriting file: " + fileInfo.FullName, progressActionSet);
                    }
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                {
                    if (progressActionSet != null)
                    {
                        ProgressActionSet.Report("Save sequence: skipped file, already exists: " + fileInfo.FullName, progressActionSet);
                    }

                    return(result);
                }
            }
            else
            {
                if (progressActionSet != null)
                {
                    ProgressActionSet.Report("Save sequence: new file: " + fileInfo.FullName, progressActionSet);
                }
            }

            if (fileInfo.Directory != null)
            {
                fileInfo.Directory.Create();
            }


            var formatter = new FastAFormatter(fileInfo.FullName);

            formatter.Write(sequences);
            formatter.Close();
            result = fileInfo.FullName;


            return(result);
        }