public static string SaveInteractionsOutput(string saveFilename, string[] interactionOutputStrings, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { var fileInfo = new FileInfo(saveFilename); if (fileInfo.Directory != null) { fileInfo.Directory.Create(); } if (File.Exists(fileInfo.FullName)) { if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { saveFilename = FileExistsHandler.FindNextFreeOutputFilename(fileInfo.FullName); fileInfo = new FileInfo(saveFilename); } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile) { } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile) { return(null); } } File.WriteAllLines(fileInfo.FullName, interactionOutputStrings); return(fileInfo.FullName); }
public static void FilterProteinInterfaceLengths( CancellationToken cancellationToken, decimal maxAtomInterationDistance, decimal minimumProteinInterfaceDensity, string[] sequenceListFileArray, string[] pdbFileDirectoryLocationArray, string filterProteinInterfacesLengthOutputFilename, bool filterProteinInterfaceCountsWithoutLengths, bool filterProteinInterfaceCountsWithLengths, FileExistsHandler.FileExistsOptions fileExistsOptions, ProgressActionSet progressActionSet) { if (sequenceListFileArray == null) { throw new ArgumentNullException(nameof(sequenceListFileArray)); } if (pdbFileDirectoryLocationArray == null) { throw new ArgumentNullException(nameof(pdbFileDirectoryLocationArray)); } if (filterProteinInterfacesLengthOutputFilename == null) { throw new ArgumentNullException(nameof(filterProteinInterfacesLengthOutputFilename)); } if (!filterProteinInterfaceCountsWithoutLengths && !filterProteinInterfaceCountsWithLengths) { ProgressActionSet.Report("Cancelled: No filter options selected.", progressActionSet); return; } // Check all sequence files are found var missingSequenceFiles = sequenceListFileArray.Where(sequenceFile => !string.IsNullOrWhiteSpace(sequenceFile) && !File.Exists(sequenceFile)).ToList(); if (missingSequenceFiles.Count > 0) { foreach (string missingSequenceFile in missingSequenceFiles) { //throw new FileNotFoundException(sequenceFile); ProgressActionSet.Report("Warning: Sequence file missing: " + missingSequenceFile, progressActionSet); } ProgressActionSet.Report("Cancelled: missing sequence files.", progressActionSet); return; } // Check all pdb folders are found var missingDirectoryList = pdbFileDirectoryLocationArray.Where(pdbDirectory => !string.IsNullOrWhiteSpace(pdbDirectory) && !Directory.Exists(pdbDirectory)).ToList(); if (missingDirectoryList.Count > 0) { foreach (string pdbDirectory in missingDirectoryList) { //throw new DirectoryNotFoundException(pdbDirectory); ProgressActionSet.Report("Warning: Structure file directory missing: " + pdbDirectory, progressActionSet); } ProgressActionSet.Report("Cancelled: missing structure file directory.", progressActionSet); return; } const string proteinInterfacesTemplateText = "%proteinInterfaces%"; if (string.IsNullOrWhiteSpace(filterProteinInterfacesLengthOutputFilename) || !filterProteinInterfacesLengthOutputFilename.Contains(proteinInterfacesTemplateText)) { throw new ArgumentOutOfRangeException(nameof(filterProteinInterfacesLengthOutputFilename)); } // Load fasta sequence files List <ISequence> sequenceList = SequenceFileHandler.LoadSequenceFileList(sequenceListFileArray, StaticValues.MolNameProteinAcceptedValues); // Get a list of the PDB Unique IDs with unique chain IDs which are wanted, ignoring others which may be present e.g. dna var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequenceList); // Get list of PDB Unique IDs List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequenceList); // Check PDB Unique IDs were successfully loaded if (pdbIdList == null || pdbIdList.Count == 0) { //throw new Exception("PDB ID List is empty or could not be loaded."); ProgressActionSet.Report("Error: Sequence list could not be loaded", progressActionSet); return; } // 3: Get a list of PDB files found in user specified directory string[] pdbFilesArray = ProteinDataBankFileOperations.RemoveNonWhiteListedPdbIdFromPdbFilesArray(pdbIdList, ProteinDataBankFileOperations.GetPdbFilesArray(pdbFileDirectoryLocationArray)); // Check all PDB files are found List <string> missingPdbFilesList = ProteinDataBankFileOperations.CheckForMissingPdbFiles(pdbFilesArray, pdbIdList); if (missingPdbFilesList != null && missingPdbFilesList.Count > 0) { ProgressActionSet.Report("Missing PDB Files: " + string.Join(", ", missingPdbFilesList), progressActionSet); } ProgressActionSet.StartAction(pdbFilesArray.Length, progressActionSet); int progressIncrement = 0; var proteinInterfacesCountResultWithLengths = new Dictionary <string, List <string> >(); var startTicks = DateTime.Now.Ticks; // 4: Loop through each pdb file for (int pdbFileNumber = 0; pdbFileNumber < pdbFilesArray.Length + 1; pdbFileNumber++) // +1 is for progress update { if (progressIncrement > 0) { ProgressActionSet.ProgressAction(progressIncrement, progressActionSet); progressIncrement = 0; if (pdbFileNumber >= pdbFilesArray.Length) { break; } } ProgressActionSet.EstimatedTimeRemainingAction(startTicks, pdbFileNumber, pdbFilesArray.Length, progressActionSet); progressIncrement++; // get unique id of pdb file string pdbFilename = pdbFilesArray[pdbFileNumber]; string proteinId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename); // check pdb unique id was in the loaded sequence list if (!pdbIdList.Contains(proteinId)) { continue; } // perform clustering to detect interaction proteinInterfaces ClusterProteinDataBankFileResult clusterPdbFileResult = Clustering.ClusterProteinDataBankFile(cancellationToken, maxAtomInterationDistance, minimumProteinInterfaceDensity, pdbFilename, pdbIdChainIdList, ClusteringMethodOptions.ClusterWithResidueSequenceIndex, -1, -1, null); if (clusterPdbFileResult == null) { continue; } int[] proteinInterfacesCount = new int[clusterPdbFileResult.ClusteringFullResultListContainer.ChainList.Count]; for (int chainIndex = 0; chainIndex < clusterPdbFileResult.ClusteringFullResultListContainer.ChainList.Count; chainIndex++) { int totalProteinInterfaces = clusterPdbFileResult.ProteinInterfaceAnalysisResultData.ProteinInterfacesSequenceAndPositionDataList.Count(proteinInterface => proteinInterface.FullProteinInterfaceId.ChainId == chainIndex); proteinInterfacesCount[chainIndex] = totalProteinInterfaces; } var proteinInterfacesCountStr = string.Join(" ", proteinInterfacesCount.OrderBy(x => x)); List <ProteinInterfaceSequenceAndPositionData> proteinInterfaces = clusterPdbFileResult.ProteinInterfaceAnalysisResultData.ProteinInterfacesSequenceAndPositionDataList; int[] proteinInterfacesLength = new int[proteinInterfaces.Count]; for (int index = 0; index < proteinInterfaces.Count; index++) { ProteinInterfaceSequenceAndPositionData proteinInterface = proteinInterfaces[index]; proteinInterfacesLength[index] = proteinInterface.ProteinInterfaceLength; } var proteinInterfacesLengthStr = string.Join(" ", proteinInterfacesLength.Distinct().OrderBy(x => x)); if (proteinInterfacesLength.Length == 0) { proteinInterfacesLengthStr = 0.ToString(); } var chainsCountStr = clusterPdbFileResult.ClusteringFullResultListContainer.ChainList.Count; if (filterProteinInterfaceCountsWithoutLengths) { var combinedKeyAll = "chains [" + chainsCountStr + "] proteinInterfaces [" + proteinInterfacesCountStr + "]"; if (!proteinInterfacesCountResultWithLengths.ContainsKey(combinedKeyAll)) { proteinInterfacesCountResultWithLengths.Add(combinedKeyAll, new List <string>()); } proteinInterfacesCountResultWithLengths[combinedKeyAll].Add(proteinId); } if (filterProteinInterfaceCountsWithLengths) { var combinedKeyWithLengths = "chains [" + chainsCountStr + "] proteinInterfaces [" + proteinInterfacesCountStr + "] lengths [" + proteinInterfacesLengthStr + "]"; if (!proteinInterfacesCountResultWithLengths.ContainsKey(combinedKeyWithLengths)) { proteinInterfacesCountResultWithLengths.Add(combinedKeyWithLengths, new List <string>()); } proteinInterfacesCountResultWithLengths[combinedKeyWithLengths].Add(proteinId); } } var confirmSaveList = new List <string>(); foreach (var kvp in proteinInterfacesCountResultWithLengths) { var seq2 = new List <ISequence>(sequenceList); seq2 = FilterProteins.RemoveSequences(cancellationToken, seq2, kvp.Value, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList); var saveFilename = filterProteinInterfacesLengthOutputFilename; saveFilename = saveFilename.Replace(proteinInterfacesTemplateText, kvp.Key); var actualSavedFilename = SequenceFileHandler.SaveSequencesAsFasta(seq2, saveFilename, true, fileExistsOptions, progressActionSet); if (!string.IsNullOrWhiteSpace(actualSavedFilename)) { confirmSaveList.Add(actualSavedFilename); } } // Confirm the total number of sequences saved is equal to original number loaded ConfirmSequencesSaved(pdbIdList, confirmSaveList, progressActionSet); ProgressActionSet.FinishAction(true, progressActionSet); }
/// <summary> /// Saves a Tree already generated by the NewickTreeFormat method. /// </summary> /// <param name="outputTreeFilename"></param> /// <param name="treeList"></param> /// <param name="fileExistsOptions"></param> /// <returns></returns> public static string SaveNewickTree(string outputTreeFilename, List <string> treeList, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { var filename = new FileInfo(FileAndPathMethods.RemoveFileExtension(outputTreeFilename) + ".tree"); if (filename.Exists) { if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { filename = new FileInfo(FileExistsHandler.FindNextFreeOutputFilename(filename.FullName)); } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile) { } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile) { return(null); } } filename.Directory?.Create(); File.WriteAllLines(filename.FullName, treeList); return(filename.FullName); }
/// <summary> /// This method iterates through the provided FASTA files creating separate calculated outputs for each of them. /// </summary> /// <param name="fastaFiles">The FASTA files to process.</param> /// <param name="pdbFilesFolders">The locations where PDB files may be found.</param> /// <param name="spreadsheetSaveFilenameTemplate">A template filename to save the outputs.</param> /// <param name="saveTsv"></param> /// <param name="saveXl"></param> /// <param name="cancellationToken"></param> /// <param name="progressActionSet"></param> /// <param name="fileExistsOptions"></param> public static void MakeHomodimerStatisticsSpreadsheetsAndOutputFiles(decimal maxAtomInterationDistance, string[] fastaFiles, string[] pdbFilesFolders, string spreadsheetSaveFilenameTemplate, bool saveTsv, bool saveXl, CancellationToken cancellationToken, ProgressActionSet progressActionSet = null, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { if (fastaFiles == null || fastaFiles.Length == 0) { throw new ArgumentOutOfRangeException(nameof(fastaFiles)); } if (pdbFilesFolders == null || pdbFilesFolders.Length == 0) { throw new ArgumentOutOfRangeException(nameof(pdbFilesFolders)); } if (!saveTsv && !saveXl) { throw new ArgumentOutOfRangeException(nameof(saveTsv)); } for (int fastaFileNumber = 0; fastaFileNumber < fastaFiles.Length; fastaFileNumber++) { string fastaFilename = fastaFiles[fastaFileNumber]; if (string.IsNullOrWhiteSpace(fastaFilename)) { continue; } ProgressActionSet.Report("Attempting to open file: " + fastaFilename, progressActionSet); List <ISequence> sequences = SequenceFileHandler.LoadSequenceFile(fastaFilename, StaticValues.MolNameProteinAcceptedValues); var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequences); if ((sequences == null) || (sequences.Count == 0)) { ProgressActionSet.Report("Error could not load file: " + fastaFilename, progressActionSet); continue; } ProgressActionSet.Report("Loaded " + sequences.Count + " sequences from file: " + fastaFilename, progressActionSet); List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences); string appendFilename = FileAndPathMethods.FullPathToFilename(fastaFilename); ProgressActionSet.Report("Creating spreadsheets...", progressActionSet); Stopwatch stopwatch = Stopwatch.StartNew(); var spreadsheetList = MakeHomodimerStatisticsSpreadsheetsList(cancellationToken, maxAtomInterationDistance, pdbFilesFolders, pdbIdList, pdbIdChainIdList, progressActionSet); stopwatch.Stop(); ProgressActionSet.Report("Finished calculating spreadsheet data [Elapsed: " + stopwatch.Elapsed.ToString(@"dd\:hh\:mm\:ss") + "]", progressActionSet); if (cancellationToken.IsCancellationRequested) { //UserProteinInterfaceOperations.ProgressBarReset(progressBar, 0, 100, 0); ////UserProteinInterfaceOperations.LabelEstimatedTimeRemainingUpdate(estimatedTimeRemaining, 0, 1, 1); ProgressActionSet.StartAction(100, progressActionSet); ProgressActionSet.ProgressAction(100, progressActionSet); ProgressActionSet.FinishAction(false, progressActionSet); ProgressActionSet.Report("Cancelled.", progressActionSet); break; } for (int spreadsheetIndex = 0; spreadsheetIndex < spreadsheetList.Count; spreadsheetIndex++) { var spreadsheet = spreadsheetList[spreadsheetIndex]; if (cancellationToken.IsCancellationRequested) { break; } // Remove the first row (which has the name for use in a worksheet title, not currently used) var sheetName = spreadsheet[0][0].CellData; var spreadsheetName = spreadsheet[1][0].CellData; spreadsheet.RemoveAt(0); // "c:/dResults/Results - %date% %time% - %fasta_filename% - %spreadsheet_name%.tsv" string saveFilename = spreadsheetSaveFilenameTemplate; saveFilename = saveFilename.Replace("%spreadsheet_name%", spreadsheetName); saveFilename = saveFilename.Replace("%fasta_filename%", appendFilename); saveFilename = saveFilename.Replace("%date%", DateTime.Now.ToString("yyyy-MM-dd")); saveFilename = saveFilename.Replace("%time%", DateTime.Now.ToString("HH.mm.ss")); saveFilename = saveFilename.Replace("%batch_number%", ""); //string.Empty + (fastaFileNumber + 1)); saveFilename = saveFilename.Replace("%batch_letter%", ""); //SpreadsheetFileHandler.AlphabetLetterRollOver(spreadsheetIndex)); sheetName = sheetName.Replace("%spreadsheet_name%", spreadsheetName); sheetName = sheetName.Replace("%fasta_filename%", appendFilename); sheetName = sheetName.Replace("%date%", DateTime.Now.ToString("yyyy-MM-dd")); sheetName = sheetName.Replace("%time%", DateTime.Now.ToString("HH.mm.ss")); sheetName = sheetName.Replace("%batch_number%", ""); //string.Empty + (fastaFileNumber + 1)); sheetName = sheetName.Replace("%batch_letter%", ""); //SpreadsheetFileHandler.AlphabetLetterRollOver(spreadsheetIndex)); //var tsvFilename = new FileInfo(FileAndPathMethods.RemoveFileExtension(saveFilename) + ".tsv"); var xlFilename = new FileInfo(FileAndPathMethods.RemoveFileExtension(saveFilename) + ".xlsx"); var savedFiles = SpreadsheetFileHandler.SaveSpreadsheet(xlFilename.FullName, new[] { sheetName }, spreadsheet, null, saveTsv, saveXl, fileExistsOptions); ProgressActionSet.ReportFilesSaved(savedFiles, progressActionSet); } } ProgressActionSet.Report("Finished processing files.", progressActionSet); }
/// <summary> /// Outputs distance matrix in cluto dense/sparse matrix format (*.mat), row headings in cluto row label file format (*.mat.rlabel) and column headings in cluto column label file format (*.mat.clabel) /// </summary> /// <param name="distanceMatrix"></param> /// <param name="matrixOutputFilename"></param> /// <param name="rowLabels"></param> /// <param name="rowLabelsOutputFilename"></param> /// <param name="columnLabels"></param> /// <param name="columnLabelsOutputFilename"></param> /// <param name="zeroHalf"></param> /// <param name="sprase"></param> /// <param name="clutoMatrixFormatTypes"></param> /// <param name="fileExistsOptions"></param> /// <returns></returns> public static string[] ConvertToMatrixFile(decimal[,] distanceMatrix, string matrixOutputFilename, string[] rowLabels = null, string rowLabelsOutputFilename = null, string[] columnLabels = null, string columnLabelsOutputFilename = null, ClutoMatrixFormatTypes clutoMatrixFormatTypes = ClutoMatrixFormatTypes.SparseMatrixTopHalf, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { if (distanceMatrix == null || distanceMatrix.Length == 0) { throw new ArgumentNullException(nameof(distanceMatrix), "null or empty"); } if (string.IsNullOrWhiteSpace(matrixOutputFilename)) { throw new ArgumentNullException(nameof(matrixOutputFilename), "null or empty"); } if (rowLabels != null && rowLabels.Length != distanceMatrix.GetLength(1)) { throw new ArgumentOutOfRangeException(nameof(rowLabels), "length not equal to matrix rows"); } if (columnLabels != null && columnLabels.Length != distanceMatrix.GetLength(0)) { throw new ArgumentOutOfRangeException(nameof(columnLabels), "length not equal to matrix columns"); } if (columnLabels != null && string.IsNullOrWhiteSpace(columnLabelsOutputFilename)) { throw new ArgumentNullException(nameof(columnLabelsOutputFilename), "null or empty"); } if (rowLabels != null && string.IsNullOrWhiteSpace(rowLabelsOutputFilename)) { throw new ArgumentNullException(nameof(rowLabelsOutputFilename), "null or empty"); } if (clutoMatrixFormatTypes == ClutoMatrixFormatTypes.DenseMatrixTopHalf || clutoMatrixFormatTypes == ClutoMatrixFormatTypes.SparseMatrixTopHalf) { distanceMatrix = DiagonalZeroHalfMatrix(distanceMatrix, true); } else if (clutoMatrixFormatTypes == ClutoMatrixFormatTypes.DenseMatrixBottomHalf || clutoMatrixFormatTypes == ClutoMatrixFormatTypes.SparseMatrixBottomHalf) { distanceMatrix = DiagonalZeroHalfMatrix(distanceMatrix, false); } bool sparse = clutoMatrixFormatTypes == ClutoMatrixFormatTypes.SparseMatrixBottomHalf || clutoMatrixFormatTypes == ClutoMatrixFormatTypes.SparseMatrixTopHalf || clutoMatrixFormatTypes == ClutoMatrixFormatTypes.SparseMatrixComplete; var result = new List <List <string> >(); result.Add(new List <string>()); var rowTotal = distanceMatrix.GetLength(1); var columnTotal = distanceMatrix.GetLength(0); var nonZeroTotal = distanceMatrix.Cast <decimal>().Count(a => a != 0); result[0].Add(rowTotal.ToString()); result[0].Add(columnTotal.ToString()); if (sparse) { result[0].Add(nonZeroTotal.ToString()); } for (var y = 0; y < rowTotal; y++) { result.Add(new List <string>()); for (var x = 0; x < columnTotal; x++) { var value = distanceMatrix[x, y]; if (sparse) { if (value == 0) { continue; } result[result.Count - 1].Add(x.ToString()); result[result.Count - 1].Add(value.ToString()); } else { result[result.Count - 1].Add(value.ToString()); } } } var lines = result.Select(a => string.Join(" ", a)).ToList(); var savedFiles = new List <string>(); if (File.Exists(matrixOutputFilename)) { if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { matrixOutputFilename = FileExistsHandler.FindNextFreeOutputFilename(matrixOutputFilename); } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile) { } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile) { return(savedFiles.ToArray()); } } FileAndPathMethods.CreateDirectory(matrixOutputFilename); File.WriteAllLines(matrixOutputFilename, lines); savedFiles.Add(matrixOutputFilename); if (rowLabels != null && !string.IsNullOrWhiteSpace(rowLabelsOutputFilename)) { var saveRowLabels = true; if (File.Exists(rowLabelsOutputFilename)) { if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { rowLabelsOutputFilename = FileExistsHandler.FindNextFreeOutputFilename(rowLabelsOutputFilename); } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile) { } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile) { saveRowLabels = false; } } if (saveRowLabels) { FileAndPathMethods.CreateDirectory(rowLabelsOutputFilename); File.WriteAllLines(rowLabelsOutputFilename, rowLabels); savedFiles.Add(rowLabelsOutputFilename); } } if (columnLabels != null && !string.IsNullOrWhiteSpace(columnLabelsOutputFilename)) { var saveColumnLabels = true; if (File.Exists(columnLabelsOutputFilename)) { if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { columnLabelsOutputFilename = FileExistsHandler.FindNextFreeOutputFilename(columnLabelsOutputFilename); } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile) { } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile) { saveColumnLabels = false; } } if (saveColumnLabels) { FileAndPathMethods.CreateDirectory(columnLabelsOutputFilename); File.WriteAllLines(columnLabelsOutputFilename, columnLabels); savedFiles.Add(columnLabelsOutputFilename); } } return(savedFiles.ToArray()); }
/// <summary> /// </summary> /// <returns>The filenames of the newly saved files</returns> public static string[] SaveSpreadsheet(string saveFilename, string[] sheetNames, List <List <SpreadsheetCell[]> > spreadsheet, ProgressActionSet progressActionSet = null, bool tsvFormat = false, bool xlsxFormat = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { if (!tsvFormat && !xlsxFormat) { throw new ArgumentException("Spreadsheet must be either TSV and/or XLSX format"); } var result = new List <string>(); if (tsvFormat) { string[] tsvSavedFiles = SaveSpreadsheetTsv(saveFilename, sheetNames, spreadsheet, progressActionSet, fileExistsOptions); result.AddRange(tsvSavedFiles); } if (xlsxFormat) { string[] xlSavedFiles = SaveSpreadsheetXl(saveFilename, sheetNames, spreadsheet, progressActionSet, fileExistsOptions); result.AddRange(xlSavedFiles); } return(result.ToArray()); }
/// <summary> /// Filters the given FASTA files and PDB files with the given options and saves the results to disk. Data needs to be /// cleaned for two reasons, firstly to not pollute or distort the results, and secondly to save unnecessary processing /// operations. /// </summary> /// <param name="cancellationToken"></param> /// <param name="pdbFilesFolders"></param> /// <param name="fastaFiles"></param> /// <param name="proteinOperationOptionFlags"></param> /// <param name="saveFastaFilenameTemplate"></param> /// <param name="consoleTextBox"></param> /// <param name="progressBar"></param> /// <param name="estimatedTimeRemaining"></param> public static void CleanProteins(CancellationToken cancellationToken, decimal maxAtomInterationDistance, string[] pdbFilesFolders, string[] fastaFiles, ProteinOperation proteinOperationOptionFlags, string saveFastaFilenameTemplate, ProgressActionSet progressActionSet, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { if (pdbFilesFolders == null || pdbFilesFolders.Length == 0) { if (proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveMultipleModelsInStructure) || proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonInteractingProteinsInStructure) || proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonSymmetricalInStructure) || proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveWrongNumberOfChainsInStructure)) { throw new ArgumentOutOfRangeException(nameof(pdbFilesFolders)); } } if (fastaFiles == null || fastaFiles.Length == 0) { throw new ArgumentOutOfRangeException(nameof(fastaFiles)); } if (string.IsNullOrWhiteSpace(saveFastaFilenameTemplate)) { throw new ArgumentOutOfRangeException(nameof(saveFastaFilenameTemplate)); } string[] pdbFilesArray = ProteinDataBankFileOperations.GetPdbFilesArray(pdbFilesFolders); //List<string> pdbIdList = null; int beforeCount = 0; int afterCount = 0; string saveFilename = saveFastaFilenameTemplate; var currentProteinOperation = ProteinOperation.LoadFile; int[] numberSequencesLoaded; var sequences = new List <ISequence> [3]; //UserProteinInterfaceOperations.TextBoxClear(consoleTextBox); ProgressActionSet.Report("Filtering proteins.", progressActionSet); // Load fasta/sequence files. sequences[0] = SequenceFileHandler.LoadSequenceFileList(fastaFiles, StaticValues.MolNameProteinAcceptedValues, out numberSequencesLoaded, true); var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequences[0]); for (int numberSequencesLoadedIndex = 0; numberSequencesLoadedIndex < numberSequencesLoaded.Length; numberSequencesLoadedIndex++) { if (numberSequencesLoaded[numberSequencesLoadedIndex] > 0) { ProgressActionSet.Report("Loaded " + numberSequencesLoaded[numberSequencesLoadedIndex] / 2 + " proteins from file: " + fastaFiles[numberSequencesLoadedIndex], progressActionSet); } else { ProgressActionSet.Report("Error could not load file: " + fastaFiles[numberSequencesLoadedIndex], progressActionSet); } } if (numberSequencesLoaded.Count(a => a > 0) == 0) { return; } // Replace placeholder variable names. saveFilename = saveFilename.Replace("%date%", DateTime.Now.ToString("yyyy-MM-dd")); saveFilename = saveFilename.Replace("%time%", DateTime.Now.ToString("HH.mm.ss")); // Save initial loaded sequences. if (File.Exists(saveFilename)) { if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { saveFilename = FileExistsHandler.FindNextFreeOutputFilename(saveFilename); } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile) { } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile) { return; } } // Removes any entries not having a protein alphabet. while (currentProteinOperation != ProteinOperation.Finished) { if (cancellationToken.IsCancellationRequested) { break; } currentProteinOperation = (ProteinOperation)((int)currentProteinOperation * 2); sequences[1] = null; sequences[2] = null; var sequencesDescriptions = new string[3]; if (currentProteinOperation == ProteinOperation.Finished) { break; } if (currentProteinOperation == ProteinOperation.RemoveNonProteinAlphabetInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonProteinAlphabetInSequence)) { continue; } if (currentProteinOperation == ProteinOperation.RemoveWrongNumberOfChainsInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveWrongNumberOfChainsInSequence)) { continue; } if (currentProteinOperation == ProteinOperation.RemoveExactDuplicatesInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveExactDuplicatesInSequence)) { continue; } if (currentProteinOperation == ProteinOperation.RemoveNonHomodimersInSequence && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonHomodimersInSequence)) { continue; } if (currentProteinOperation == ProteinOperation.RemoveWrongNumberOfChainsInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveWrongNumberOfChainsInStructure)) { continue; } if (currentProteinOperation == ProteinOperation.RemoveMultipleModelsInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveMultipleModelsInStructure)) { continue; } if (currentProteinOperation == ProteinOperation.RemoveNonInteractingProteinsInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonInteractingProteinsInStructure)) { continue; } if (currentProteinOperation == ProteinOperation.RemoveNonSymmetricalInStructure && !proteinOperationOptionFlags.HasFlag(ProteinOperation.RemoveNonSymmetricalInStructure)) { continue; } // Count sequences before operation. beforeCount = sequences[0].Count / 2; // Update user about what is happening. ProgressActionSet.Report("", progressActionSet); ProgressActionSet.Report("Removing " + ProteinOperationString(currentProteinOperation) + " entries [from " + beforeCount + " proteins]", progressActionSet); // Start stopwatch to count duration of operation. Stopwatch stopwatch = Stopwatch.StartNew(); // Perform specified operation. switch (currentProteinOperation) { case ProteinOperation.RemoveNonProteinAlphabetInSequence: { sequencesDescriptions[0] = "01 - Removed non-protein sequences (sequence filter)"; sequences[0] = FilterProteins.RemoveNonProteinAlphabetSequences(cancellationToken, sequences[0], progressActionSet); break; } case ProteinOperation.RemoveWrongNumberOfChainsInSequence: { sequencesDescriptions[0] = "02 - Removed non-dimers (sequence filter)"; sequences[0] = FilterProteins.RemoveSequencesWithIncorrectNumberOfChains(cancellationToken, sequences[0], 2, progressActionSet); break; } case ProteinOperation.RemoveExactDuplicatesInSequence: { sequencesDescriptions[0] = "03 - Removed exact duplicates (sequence filter)"; sequences[0] = FilterProteins.RemoveDuplicates(cancellationToken, sequences[0], progressActionSet); break; } case ProteinOperation.RemoveNonHomodimersInSequence: { // homodimers - all types - unfiltered for interactions or symmetry var result = FilterProteins.SplitDimerTypes(cancellationToken, sequences[0], 30, 90, progressActionSet); sequencesDescriptions[0] = "04 - Homodimers only (sequence filter)"; sequences[0] = result.HomoDimerPdbIdList; sequencesDescriptions[1] = "04 - Heterodimers only (sequence filter)"; sequences[1] = result.HeteroDimerPdbIdList; sequencesDescriptions[2] = "04 - Homology dimers only (sequence filter)"; sequences[2] = result.HomologyDimerPdbIdList; break; } case ProteinOperation.RemoveMultipleModelsInStructure: { sequencesDescriptions[0] = "05 - Removed multiple models (structure filter)"; List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]); pdbIdList = FilterProteins.RemoveMultipleStructureModels(cancellationToken, pdbFilesFolders, pdbIdList, progressActionSet); sequences[0] = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesInList); break; } case ProteinOperation.RemoveWrongNumberOfChainsInStructure: { sequencesDescriptions[0] = "06 - Removed non-dimers (structure filter)"; List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]); //var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequences[0]); pdbIdList = FilterProteins.RemoveStructuresWithIncorrectNumberOfChains(cancellationToken, pdbFilesFolders, pdbIdList, pdbIdChainIdList, 2, progressActionSet); sequences[0] = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesInList); break; } case ProteinOperation.RemoveNonInteractingProteinsInStructure: { // Make copy of sequences as we will split the list into two parts - with and without interactions. sequences[1] = new List <ISequence>(sequences[0]); // Get pdb id list from sequences, to check for pdb file, load, perform processing. List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]); // Makes a list of sequences with interactions. pdbIdList = FilterProteins.RemoveSequencesWithoutInteractions(cancellationToken, maxAtomInterationDistance, pdbFilesFolders, pdbIdList, pdbIdChainIdList, progressActionSet); // Remove any protein not in the list, keep the ones in the list. sequencesDescriptions[0] = "08 - dimers - with interactions - unfiltered for symmetry"; sequences[0] = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList); sequencesDescriptions[1] = "07 - dimers - no observed interactions"; sequences[1] = FilterProteins.RemoveSequences(cancellationToken, sequences[1], pdbIdList, FilterProteins.RemoveSequencesOptions.RemoveSequencesInList); break; } case ProteinOperation.RemoveNonSymmetricalInStructure: { // Make copy of sequences as we will split the list into two parts - with and without symmetry. List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequences[0]); sequences[1] = new List <ISequence>(sequences[0]); sequences[2] = new List <ISequence>(sequences[0]); Dictionary <string, decimal> symmetryPercentage = FilterProteins.CalculateStructureSymmetry(cancellationToken, maxAtomInterationDistance, pdbFilesFolders, pdbIdList, pdbIdChainIdList, progressActionSet); var pdbSymmetrical = new List <string>(); var pdbPartSymmetrical = new List <string>(); var pdbNonSymmetrical = new List <string>(); foreach (var symmetryPercentageKeyValuePair in symmetryPercentage) { if (symmetryPercentageKeyValuePair.Value == 0.0m) { pdbNonSymmetrical.Add(symmetryPercentageKeyValuePair.Key); } else if (symmetryPercentageKeyValuePair.Value == 100.0m) { pdbSymmetrical.Add(symmetryPercentageKeyValuePair.Key); } else if (symmetryPercentageKeyValuePair.Value > 0.0m && symmetryPercentageKeyValuePair.Value < 100.0m) { pdbPartSymmetrical.Add(symmetryPercentageKeyValuePair.Key); } else { ProgressActionSet.Report("Error: Out of bounds symmetry value of " + symmetryPercentageKeyValuePair.Value + " was found in " + symmetryPercentageKeyValuePair.Key + ".", progressActionSet); } } sequencesDescriptions[0] = "11 - dimers - with interactions - 100% symmetrical"; sequences[0] = FilterProteins.RemoveSequences(cancellationToken, sequences[0], pdbSymmetrical, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList); sequencesDescriptions[1] = "10 - dimers - with interactions - 1% to 99% symmetrical"; sequences[1] = FilterProteins.RemoveSequences(cancellationToken, sequences[1], pdbPartSymmetrical, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList); sequencesDescriptions[2] = "09 - dimers - with interactions - 0% symmetrical"; sequences[2] = FilterProteins.RemoveSequences(cancellationToken, sequences[2], pdbNonSymmetrical, FilterProteins.RemoveSequencesOptions.RemoveSequencesNotInList); break; } } // Stop stopwatch immediately after operation. stopwatch.Stop(); // Count sequences after operation. afterCount = sequences[0].Count / 2; if (!cancellationToken.IsCancellationRequested) { for (int sequencesIndex = sequences.GetLowerBound(0); sequencesIndex <= sequences.GetUpperBound(0); sequencesIndex++) { if (sequences[sequencesIndex] != null) { // Find free filename to save the latest sequence results of operations. string localSaveFilename = saveFilename; localSaveFilename = localSaveFilename.Replace("%fasta_filename%", sequencesDescriptions[sequencesIndex]); bool skipFile = false; if (File.Exists(localSaveFilename)) { if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { localSaveFilename = FileExistsHandler.FindNextFreeOutputFilename(localSaveFilename); } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile) { } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile) { skipFile = true; } } if (!skipFile) { // Save the sequence results to previous set filename. string savedFile /*s*/ = SequenceFileHandler.SaveSequencesAsFasta(sequences[sequencesIndex], localSaveFilename); // Inform user that file has been saved. //foreach (char savedFile in savedFiles) //{ ProgressActionSet.Report("Saved file: " + savedFile, progressActionSet); //} } } } // Update the user about the results. ProgressActionSet.Report("Removed " + (beforeCount - afterCount) + " proteins. [" + afterCount + " proteins remaining]. Elapsed: " + stopwatch.Elapsed.ToString(@"dd\:hh\:mm\:ss"), progressActionSet); } } if (!cancellationToken.IsCancellationRequested) { ProgressActionSet.Report("Finished all selected filtering operations.", progressActionSet); } else { ProgressActionSet.Report("Cancelled.", progressActionSet); //UserProteinInterfaceOperations.ProgressBarReset(progressBar, 0, 100, 0); ////UserProteinInterfaceOperations.LabelEstimatedTimeRemainingUpdate(estimatedTimeRemaining, 0, 1, 1); ProgressActionSet.StartAction(100, progressActionSet); ProgressActionSet.ProgressAction(100, progressActionSet); ProgressActionSet.FinishAction(false, progressActionSet); } }
public static string[] SaveSpreadsheet(string saveFilename, string[] sheetNames, SpreadsheetCell[,] spreadsheet, ProgressActionSet progressActionSet = null, bool tsvFormat = false, bool xlsxFormat = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { var convertedSpreadsheet = ConvertTypes.SpreadsheetCell2DArrayToJaggedArray(spreadsheet); return(SaveSpreadsheet(saveFilename, sheetNames, convertedSpreadsheet.ToList(), progressActionSet, tsvFormat, xlsxFormat, fileExistsOptions)); }
public static string[] SaveSpreadsheet(string saveFilename, string[] sheetNames, List <SpreadsheetCell[]> spreadsheet, ProgressActionSet progressActionSet = null, bool tsvFormat = false, bool xlsxFormat = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { return(SaveSpreadsheet(saveFilename, sheetNames, new List <List <SpreadsheetCell[]> >() { spreadsheet }, progressActionSet, tsvFormat, xlsxFormat, fileExistsOptions)); }
public static string[] SaveSpreadsheet(string saveFilename, string[] sheetNames, SpreadsheetCell[][] spreadsheet, ProgressActionSet progressActionSet = null, bool tsvFormat = false, bool xlsxFormat = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { return(SaveSpreadsheet(saveFilename, sheetNames, spreadsheet.Select(a => a.ToArray()).ToList(), progressActionSet, tsvFormat, xlsxFormat, fileExistsOptions)); }
public static string[] SaveSpreadsheet(string saveFilename, string[] sheetNames, List <SpreadsheetCell[, ]> spreadsheet, ProgressActionSet progressActionSet = null, bool tsvFormat = false, bool xlsxFormat = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { var convertedSpreadsheet = new List <List <SpreadsheetCell[]> >(); foreach (var sheet in spreadsheet) { var convertedSheet = ConvertTypes.SpreadsheetCell2DArrayToJaggedArray(sheet).ToList(); convertedSpreadsheet.Add(convertedSheet); } return(SaveSpreadsheet(saveFilename, sheetNames, convertedSpreadsheet, progressActionSet, tsvFormat, xlsxFormat, fileExistsOptions)); }
public static string[] SaveSpreadsheetXl(string saveFilename, string[] sheetNames, List <List <SpreadsheetCell[]> > spreadsheet, ProgressActionSet progressActionSet = null, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { var result = new List <string>(); var xlFilename = new FileInfo(FileAndPathMethods.RemoveFileExtension(saveFilename) + ".xlsx"); if (xlFilename.Exists) { if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { xlFilename = new FileInfo(FileExistsHandler.FindNextFreeOutputFilename(xlFilename.FullName)); } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile) { } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile) { return(result.ToArray()); } } if (xlFilename.Directory != null) { xlFilename.Directory.Create(); } var totalSheets = spreadsheet.Count; string sheetName = XlSheetNameVerification(Path.GetFileNameWithoutExtension(xlFilename.Name)); if (sheetNames == null || sheetNames.Length < spreadsheet.Count) { var extraSheetNames = new string[totalSheets]; if (sheetNames != null && sheetNames.Length > 0) { Array.Copy(sheetNames, extraSheetNames, sheetNames.Length); } for (var sheetIndex = sheetNames != null ? sheetNames.Length : 0; sheetIndex < extraSheetNames.Length; sheetIndex++) { extraSheetNames[sheetIndex] = sheetName + (sheetIndex + 1); } sheetNames = extraSheetNames; } for (int index = 0; index < sheetNames.Length; index++) { if (string.IsNullOrWhiteSpace(sheetNames[index])) { sheetNames[index] = sheetName + (index + 1); } sheetNames[index] = XlSheetNameVerification(sheetNames[index]); } XlSpreadsheetDocumentContainer xlSpreadsheet = ExcelCreateSpreadsheet.CreateSpreadsheetWorkbook(xlFilename.FullName, sheetNames); //Worksheet worksheet1 = xlSpreadsheet.WorksheetPart.Worksheet;//new Worksheet(); var runPropertiesArray = GetAminoAcidRunProperties(); for (uint sheetIndex = 0; sheetIndex < spreadsheet.Count; sheetIndex++) { var worksheet1 = new Worksheet(); var sheetData1 = new SheetData(); for (uint rowIndex = 0; rowIndex < spreadsheet[(int)sheetIndex].Count; rowIndex++) { var rowDataArray = spreadsheet[(int)sheetIndex][(int)rowIndex]; if (rowDataArray == null) { continue; } var row1 = new Row { RowIndex = (UInt32Value)rowIndex + 1 /*, Spans = new ListValue<StringValue>() { InnerText = "1:3" }, DyDescent = 0.25D*/ }; //var row4 = new Row(){ RowIndex = (UInt32Value)4U, Spans = new ListValue<StringValue>() { InnerText = "1:2" }, DyDescent = 0.25D }; for (uint columnIndex = 0; columnIndex < rowDataArray.Length; columnIndex++) { string columnValue = rowDataArray[columnIndex].CellData; if (string.IsNullOrWhiteSpace(columnValue)) { continue; } string columnName = AlphabetLetterRollOver((int)columnIndex); string cellRef = columnName + (rowIndex + 1); var cell1 = new Cell { CellReference = cellRef, StyleIndex = 1U }; switch (rowDataArray[columnIndex].SpreadsheetDataType) { case SpreadsheetDataTypes.String: cell1.DataType = CellValues.String; break; case SpreadsheetDataTypes.Integer: cell1.DataType = CellValues.Number; break; case SpreadsheetDataTypes.Double: cell1.DataType = CellValues.Number; break; case SpreadsheetDataTypes.Decimal: cell1.DataType = CellValues.Number; break; } //InlineString inlineString1 = new InlineString(); //Text text1 = new Text(); //text1.Text = columnValue; //inlineString1.Append(text1); //cell1.Append(inlineString1); //if (rowDataArray[columnIndex].CellColourScheme == SpreadsheetCellColourScheme.Default) //{ var cellValue1 = new CellValue(); cellValue1.Text = columnValue; cell1.Append(cellValue1); row1.Append(cell1); //} //else if (rowDataArray[columnIndex].CellColourScheme == SpreadsheetCellColourScheme.AminoAcidsUniProtKb) //{ // foreach (var ch in rowDataArray[columnIndex].CellData) // { // var subgroups = AminoAcidGroups.AminoAcidGroups.ConvertAminoAcidNameCodeToGroupNumbers(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.UniProtKb, ch); // var firstGroup = subgroups[0]; // //var groupColours = AminoAcidGroups.AminoAcidGroups.GetGroupColors(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.UniProtKb); // //var firstGroupColour = groupColours[firstGroup]; // var runProperties = runPropertiesArray[(int) AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.UniProtKb][firstGroup]; // Run run = new Run(); // Text text = new Text(); // text.Text = ""+ch; // run.Append(runProperties); // run.Append(text); // cell1.Append(run); // row1.Append(cell1); // } //} //worksheetPart1.Worksheet = worksheet1; //var cell = ExcelSheets.InsertCellInWorksheet(ProteinInterfaceDetection.AlphabetLetterRollOver(columnIndex), rowIndex + 1, xlSpreadsheet.WorksheetPart); //ExcelSheets.InsertText(xlSpreadsheet.SpreadsheetDocument, xlSpreadsheet.WorksheetPart, cell, "Test"); //cell.DataType = new EnumValue<CellValues>(CellValues.String);//.SharedString); //cell.CellValue = new CellValue("test"); //xlSpreadsheet.WorksheetPart.Worksheet.Save(); } sheetData1.Append(row1); } worksheet1.Append(sheetData1); //xlSpreadsheet.WorksheetPart.Worksheet = worksheet1; xlSpreadsheet.WorkbookPartObject.WorksheetParts.ToList()[(int)sheetIndex].Worksheet = worksheet1; } //xlSpreadsheet.WorksheetPart.Worksheet.Save(); //xlSpreadsheet.WorkbookPart.Workbook.Save(); //xlSpreadsheet.SpreadsheetDocument.WorkbookPart.Workbook.Save(); xlSpreadsheet.SpreadsheetDocumentObject.Close(); result.Add(xlFilename.FullName); return(result.ToArray()); }
public static string[] SaveSpreadsheetXl(string saveFilename, string[] sheetNames, List <SpreadsheetCell[]> spreadsheet, ProgressActionSet progressActionSet = null, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { return(SaveSpreadsheetXl(saveFilename, sheetNames, new List <List <SpreadsheetCell[]> >() { spreadsheet }, progressActionSet, fileExistsOptions)); }
public static string[] SaveSpreadsheetTsv(string saveFilename, List <SpreadsheetCell[]> spreadsheet, ProgressActionSet progressActionSet = null, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { var result = new List <string>(); var tsvFilename = new FileInfo(FileAndPathMethods.RemoveFileExtension(saveFilename) + ".tsv"); if (tsvFilename.Exists) { if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { tsvFilename = new FileInfo(FileExistsHandler.FindNextFreeOutputFilename(tsvFilename.FullName)); } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile) { } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile) { return(result.ToArray()); } } var stringBuilder = new StringBuilder(); for (int rowIndex = 0; rowIndex < spreadsheet.Count; rowIndex++) { var rowDataArray = spreadsheet[rowIndex]; if (rowDataArray == null) { continue; } stringBuilder.AppendLine(string.Join("\t", rowDataArray.Select(row => row.CellData))); } if (tsvFilename.Directory != null) { tsvFilename.Directory.Create(); } File.WriteAllText(tsvFilename.FullName, stringBuilder.ToString()); result.Add(tsvFilename.FullName); return(result.ToArray()); }
public static string[] SaveSpreadsheetTsv(string saveFilename, string[] sheetNames, List <List <SpreadsheetCell[]> > spreadsheet, ProgressActionSet progressActionSet = null, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { var resultList = new List <string>(); for (int sheetIndex = 0; sheetIndex < spreadsheet.Count; sheetIndex++) { var padLen = ("" + spreadsheet.Count).Length; if (padLen < 2) { padLen = 2; } var paddedSheetIndex = ("" + (sheetIndex + 1)).PadLeft(padLen, '0'); var sheetName = ""; if (sheetNames != null && sheetNames.Length > sheetIndex) { sheetName = sheetNames[sheetIndex]; } if (string.IsNullOrWhiteSpace(sheetName)) { sheetName = Path.GetFileNameWithoutExtension(saveFilename) + " [sheet " + paddedSheetIndex + "]"; } var saveSheetFilename = FileAndPathMethods.MergePathAndFilename(Path.GetDirectoryName(saveFilename), sheetName + Path.GetExtension(saveFilename)); var result = SaveSpreadsheetTsv(saveSheetFilename, spreadsheet[sheetIndex], progressActionSet, fileExistsOptions); resultList.AddRange(result); } return(resultList.ToArray()); }
/// <summary> /// Save to disk a list of sequences in FASTA format. /// </summary> /// <param name="sequences"></param> /// <param name="saveFilename"></param> public static string SaveSequencesAsFasta(List <ISequence> sequences, string saveFilename, bool appendSequenceCountToFilename = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename, ProgressActionSet progressActionSet = null) { if (sequences == null) // || sequences.Count == 0) { throw new ArgumentOutOfRangeException(nameof(sequences)); } if (string.IsNullOrWhiteSpace(saveFilename)) { throw new ArgumentOutOfRangeException(nameof(saveFilename)); } string result = null; // new List<string>(); if (appendSequenceCountToFilename) { saveFilename = AddSequenceAndProteinCountToFilename(sequences, saveFilename); } // make sure directory exists var fileInfo = new FileInfo(saveFilename); if (fileInfo.Exists) { if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { fileInfo = new FileInfo(FileExistsHandler.FindNextFreeOutputFilename(fileInfo.FullName)); if (progressActionSet != null) { ProgressActionSet.Report("Save sequence: already exists, appended number: " + fileInfo.FullName, progressActionSet); } } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile) { if (progressActionSet != null) { ProgressActionSet.Report("Save sequence: overwriting file: " + fileInfo.FullName, progressActionSet); } } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile) { if (progressActionSet != null) { ProgressActionSet.Report("Save sequence: skipped file, already exists: " + fileInfo.FullName, progressActionSet); } return(result); } } else { if (progressActionSet != null) { ProgressActionSet.Report("Save sequence: new file: " + fileInfo.FullName, progressActionSet); } } if (fileInfo.Directory != null) { fileInfo.Directory.Create(); } var formatter = new FastAFormatter(fileInfo.FullName); formatter.Write(sequences); formatter.Close(); result = fileInfo.FullName; return(result); }