Esempio n. 1
0
        /// <summary>
        /// Reads the proteins from the FASTA file.
        /// </summary>
        private void LoadFastaFile()
        {
            this.FastaEntries.Clear();

            if (!string.IsNullOrEmpty(this.FastaDbFilePath) && File.Exists(this.FastaDbFilePath))
            {
                try
                {
                    this.FastaEntries.AddRange(FastaReaderWriter.ReadFastaFile(this.FastaDbFilePath));
                }
                catch (FormatException e)
                {
                    this.dialogService.ExceptionAlert(e);
                    this.FastaEntries.Clear();
                }
            }
        }
Esempio n. 2
0
        public void CountMissingIdsInFasta(string missingIdsFile, string fastaFile)
        {
            string directoryPath = @"C:\Users\wilk011\Documents\DataFiles\FlashUnitTest";

            missingIdsFile = Path.Combine(directoryPath, missingIdsFile);
            fastaFile      = Path.Combine(directoryPath, fastaFile);

            // Parse missing ids file
            List <string[]> missingProteins = new List <string[]>(); // each element of the list corresponds to an ID. Each element of the array corresponds to a protein accession

            foreach (var line in File.ReadLines(missingIdsFile))
            {
                var parts = line.Split('\t');
                if (parts.Length < 5)
                {
                    continue;
                }

                var proteins = parts[2].Split(';').Where(part => !string.IsNullOrWhiteSpace(part)).ToArray();
                missingProteins.Add(proteins);
            }

            // Parse FASTA file
            var fastas = FastaReaderWriter.ReadFastaFile(fastaFile).ToDictionary(f => f.ProteinName, f => f);

            // Count number of proteins
            int numInFasta = 0, numNotInFasta = 0;

            foreach (var proteinSet in missingProteins)
            {
                if (proteinSet.Any(protein => fastas.ContainsKey(protein)))
                {
                    numInFasta++;
                }
                else
                {
                    numNotInFasta++;
                }
            }

            Console.WriteLine("Total: " + missingProteins.Count);
            Console.WriteLine("# In FASTA:" + numInFasta);
            Console.WriteLine("# Not in FASTA: " + numNotInFasta);
        }
Esempio n. 3
0
        public void CountFlashIdsInFasta(string uniqueIdFile, string fastaFile)
        {
            string directoryPath = @"C:\Users\wilk011\Documents\DataFiles\FlashUnitTest";

            uniqueIdFile = Path.Combine(directoryPath, uniqueIdFile);
            fastaFile    = Path.Combine(directoryPath, fastaFile);

            var sequences = File.ReadLines(uniqueIdFile).Select(line => line.Split('\t')).Select(p => p[1]).ToList();
            var fastas    = FastaReaderWriter.ReadFastaFile(fastaFile);

            int numUniqueInFasta = 0;

            foreach (var sequence in sequences)
            {
                if (fastas.Any(fasta => fasta.ProteinSequenceText.Contains(sequence)))
                {
                    numUniqueInFasta++;
                }
            }

            Console.WriteLine(@"{0} of {1} in {2}", numUniqueInFasta, sequences.Count, Path.GetFileNameWithoutExtension(fastaFile));
        }
Esempio n. 4
0
        /// <summary>
        /// Create a truncated FASTA file based on selected proteins.
        /// </summary>
        /// <returns>The path to the truncated FASTA database file.</returns>
        private string CreateTruncatedFastaFile()
        {
            var fastaFileName = Path.GetFileNameWithoutExtension(this.FastaDbFilePath);
            var filePath      = string.Format("{0}\\{1}_truncated.fasta", this.OutputFilePath, fastaFileName);

            IEnumerable <FastaEntry> entries = new FastaEntry[0];

            if (this.FromFastaEntry)
            {
                entries = this.FastaEntries.Where(entry => entry.Selected);
            }
            else if (this.FromSequence)
            {
                var selectedEntries = this.SequenceProteins.Where(entry => entry.Selected).ToArray();
                if (this.FixedNTerm && this.FixedCTerm)
                {   // Just use the selected sequence for every protein.
                    entries = selectedEntries.Select(
                        entry =>
                        new FastaEntry
                    {
                        ProteinName         = entry.ProteinName,
                        ProteinDescription  = entry.ProteinDescription,
                        ProteinSequenceText = this.SelectedSequence,
                        Selected            = true
                    });
                    entries = new List <FastaEntry> {
                        entries.FirstOrDefault()
                    };
                }
                else if (this.FixedNTerm)
                {
                    entries = from entry in selectedEntries
                              let startIndex = entry.ProteinSequenceText.IndexOf(this.SelectedSequence, StringComparison.Ordinal)
                                               where startIndex > -1
                                               let sequence = entry.ProteinSequenceText.Substring(startIndex)
                                                              select new FastaEntry
                    {
                        ProteinName         = entry.ProteinName,
                        ProteinDescription  = entry.ProteinDescription,
                        ProteinSequenceText = sequence
                    };
                }
                else if (this.FixedCTerm)
                {
                    entries = from entry in selectedEntries
                              let startIndex = entry.ProteinSequenceText.IndexOf(this.SelectedSequence, StringComparison.Ordinal)
                                               where startIndex > -1
                                               let sequence = entry.ProteinSequenceText.Substring(0, startIndex + this.SelectedSequence.Length)
                                                              select new FastaEntry
                    {
                        ProteinName         = entry.ProteinName,
                        ProteinDescription  = entry.ProteinDescription,
                        ProteinSequenceText = sequence
                    };
                }
                else
                {
                    entries = selectedEntries;
                }
            }

            Console.WriteLine(@"Creating truncated fasta file at: {0}", filePath);
            FastaReaderWriter.Write(entries, filePath);
            return(filePath);
        }