/// <summary> /// Reads the proteins from the FASTA file. /// </summary> private void LoadFastaFile() { this.FastaEntries.Clear(); if (!string.IsNullOrEmpty(this.FastaDbFilePath) && File.Exists(this.FastaDbFilePath)) { try { this.FastaEntries.AddRange(FastaReaderWriter.ReadFastaFile(this.FastaDbFilePath)); } catch (FormatException e) { this.dialogService.ExceptionAlert(e); this.FastaEntries.Clear(); } } }
public void CountMissingIdsInFasta(string missingIdsFile, string fastaFile) { string directoryPath = @"C:\Users\wilk011\Documents\DataFiles\FlashUnitTest"; missingIdsFile = Path.Combine(directoryPath, missingIdsFile); fastaFile = Path.Combine(directoryPath, fastaFile); // Parse missing ids file List <string[]> missingProteins = new List <string[]>(); // each element of the list corresponds to an ID. Each element of the array corresponds to a protein accession foreach (var line in File.ReadLines(missingIdsFile)) { var parts = line.Split('\t'); if (parts.Length < 5) { continue; } var proteins = parts[2].Split(';').Where(part => !string.IsNullOrWhiteSpace(part)).ToArray(); missingProteins.Add(proteins); } // Parse FASTA file var fastas = FastaReaderWriter.ReadFastaFile(fastaFile).ToDictionary(f => f.ProteinName, f => f); // Count number of proteins int numInFasta = 0, numNotInFasta = 0; foreach (var proteinSet in missingProteins) { if (proteinSet.Any(protein => fastas.ContainsKey(protein))) { numInFasta++; } else { numNotInFasta++; } } Console.WriteLine("Total: " + missingProteins.Count); Console.WriteLine("# In FASTA:" + numInFasta); Console.WriteLine("# Not in FASTA: " + numNotInFasta); }
public void CountFlashIdsInFasta(string uniqueIdFile, string fastaFile) { string directoryPath = @"C:\Users\wilk011\Documents\DataFiles\FlashUnitTest"; uniqueIdFile = Path.Combine(directoryPath, uniqueIdFile); fastaFile = Path.Combine(directoryPath, fastaFile); var sequences = File.ReadLines(uniqueIdFile).Select(line => line.Split('\t')).Select(p => p[1]).ToList(); var fastas = FastaReaderWriter.ReadFastaFile(fastaFile); int numUniqueInFasta = 0; foreach (var sequence in sequences) { if (fastas.Any(fasta => fasta.ProteinSequenceText.Contains(sequence))) { numUniqueInFasta++; } } Console.WriteLine(@"{0} of {1} in {2}", numUniqueInFasta, sequences.Count, Path.GetFileNameWithoutExtension(fastaFile)); }
/// <summary> /// Create a truncated FASTA file based on selected proteins. /// </summary> /// <returns>The path to the truncated FASTA database file.</returns> private string CreateTruncatedFastaFile() { var fastaFileName = Path.GetFileNameWithoutExtension(this.FastaDbFilePath); var filePath = string.Format("{0}\\{1}_truncated.fasta", this.OutputFilePath, fastaFileName); IEnumerable <FastaEntry> entries = new FastaEntry[0]; if (this.FromFastaEntry) { entries = this.FastaEntries.Where(entry => entry.Selected); } else if (this.FromSequence) { var selectedEntries = this.SequenceProteins.Where(entry => entry.Selected).ToArray(); if (this.FixedNTerm && this.FixedCTerm) { // Just use the selected sequence for every protein. entries = selectedEntries.Select( entry => new FastaEntry { ProteinName = entry.ProteinName, ProteinDescription = entry.ProteinDescription, ProteinSequenceText = this.SelectedSequence, Selected = true }); entries = new List <FastaEntry> { entries.FirstOrDefault() }; } else if (this.FixedNTerm) { entries = from entry in selectedEntries let startIndex = entry.ProteinSequenceText.IndexOf(this.SelectedSequence, StringComparison.Ordinal) where startIndex > -1 let sequence = entry.ProteinSequenceText.Substring(startIndex) select new FastaEntry { ProteinName = entry.ProteinName, ProteinDescription = entry.ProteinDescription, ProteinSequenceText = sequence }; } else if (this.FixedCTerm) { entries = from entry in selectedEntries let startIndex = entry.ProteinSequenceText.IndexOf(this.SelectedSequence, StringComparison.Ordinal) where startIndex > -1 let sequence = entry.ProteinSequenceText.Substring(0, startIndex + this.SelectedSequence.Length) select new FastaEntry { ProteinName = entry.ProteinName, ProteinDescription = entry.ProteinDescription, ProteinSequenceText = sequence }; } else { entries = selectedEntries; } } Console.WriteLine(@"Creating truncated fasta file at: {0}", filePath); FastaReaderWriter.Write(entries, filePath); return(filePath); }