Inheritance: IDisposable
Esempio n. 1
0
        public void CreateDatabase()
        {
            try
            {
                // Validate Options are kosher

                if (Options.InputFiles == null ||Options.InputFiles.Count == 0)
                {
                    throw new ArgumentNullException("Input Files");
                }

                string ext = Path.GetExtension(Options.OutputFastaFile);
                if (string.IsNullOrEmpty(ext))
                {
                    ext = ".fasta";
                }
                string output_filename = Path.GetFullPath(Options.OutputFastaFile);

                if (Options.DoNotAppendDatabaseType)
                {
                    output_filename = output_filename + ext;
                    if (Options.InputFiles.Contains(output_filename))
                    {
                        throw new ArgumentException("Output file path cannot be the same as an input file.");
                    }
                }
                else
                {
                    switch (Options.OutputType)
                    {
                        case DatabaseType.Target:
                            output_filename += "_TARGET" + ext;
                            break;
                        case DatabaseType.Decoy:
                            output_filename += "_DECOY" + ext;
                            break;
                        case DatabaseType.Concatenated:
                        default:
                            output_filename += "_CONCAT" + ext;
                            break;
                    }
                }

                string logFilename = Path.GetFileNameWithoutExtension(Options.OutputFastaFile);
                string outputFolder = Path.GetDirectoryName(Options.OutputFastaFile);
                if (!Directory.Exists(outputFolder))
                {
                    outputFolder = Directory.GetCurrentDirectory();
                }

                if (Options.GenerateLogFile)
                {
                    switch (Options.OutputType)
                    {
                        case DatabaseType.Target:
                            logFilename += "_TARGET.log";
                            break;
                        case DatabaseType.Decoy:
                            logFilename += "_DECOY.log";
                            break;
                        case DatabaseType.Concatenated:
                        default:
                            logFilename += "_CONCAT.log";
                            break;
                    }
                    GenerateLog(outputFolder, logFilename);
                }

                string outputPath = Path.Combine(outputFolder, output_filename);

                if (Options.DoNotMergeFiles)
                {
                    foreach (string fastaFile in Options.InputFiles)
                    {
                        using (FastaWriter writer = new FastaWriter(outputPath))
                        {
                            WriteFasta(fastaFile, writer);
                        }
                    }
                }
                else
                {
                    using (FastaWriter writer = new FastaWriter(outputPath))
                    {
                        foreach (string fastaFile in Options.InputFiles)
                        {
                            WriteFasta(fastaFile, writer);
                        }
                    }
                }

                if (Options.BlastDatabase)
                {
                    MakeBlastDatabase(outputFolder, output_filename, Path.GetFileNameWithoutExtension(output_filename));
                }
            }

            catch (DirectoryNotFoundException)
            {
            }
        }
Esempio n. 2
0
        private void WriteSequenceMaps(List<ProteinGroup> proteinGroups, string outputDirectory)
        {
            string fileName = Path.Combine(outputDirectory, "Sequence Coverage Map.txt");
            Log("Writing file " + fileName);
            string csvFile = Path.Combine(outputDirectory, "data.csv");
            string identifiedFile = Path.Combine(outputDirectory, "identifiedSequences.fasta");
            string observedFile = Path.Combine(outputDirectory, "observedProteins.fasta");
            using (StreamWriter csvWriter = new StreamWriter(csvFile))
            using (FastaWriter identifiedWriter = new FastaWriter(identifiedFile))
            using (FastaWriter observedWriter = new FastaWriter(observedFile))
            using (StreamWriter writer = new StreamWriter(fileName))
            {
                csvWriter.WriteLine("protein,position,count,coverage,protein name,# peptides");

                int proteinID = 0;
                foreach (ProteinGroup proteinGroup in proteinGroups.Where(g => !g.IsDecoy).OrderBy(g => g.LongestProteinLen))
                {
                    proteinID++;
                    string sequence = proteinGroup.RepresentativeProtein.Sequence;
                    string leusequence = proteinGroup.RepresentativeProtein.LeucineSequence;
                    int length = sequence.Length;
                    int[] bits = proteinGroup.RepresentativeProtein.GetSequenceCoverage(proteinGroup.Peptides);
                    ISet<Peptide> peptides = proteinGroup.Peptides;

                    // Write the header data
                    writer.WriteLine("========");
                    writer.WriteLine("Proteins = {0}", proteinGroup.Count);
                    writer.WriteLine(proteinGroup.RepresentativeProtein.Description);

                    foreach(Protein prot in proteinGroup) {
                        if(prot != proteinGroup.RepresentativeProtein)
                            writer.WriteLine(prot.Description);
                    }
                    writer.WriteLine("Length = {0}", proteinGroup.RepresentativeProtein.Length);
                    writer.WriteLine("Redundacy = {0:g3}%", proteinGroup.SequenceRedundacy);
                    writer.WriteLine("Coverage = {0:g3}%, {1} AA", proteinGroup.SequenceCoverage, bits.Count(bit => bit > 0));
                    int shared = peptides.Count(pep => pep.IsShared);
                    if (shared > 0)
                    {
                        int[] bits2 = proteinGroup.RepresentativeProtein.GetSequenceCoverage(peptides.Where(pep => !pep.IsShared));
                        int observedAminoAcids = bits2.Count(bit => bit > 0);
                        double coverage = (double)observedAminoAcids / bits2.Length * 100.0;
                        writer.WriteLine("Coverage = {0:g3}%, {1} AA (unshared only)", coverage, observedAminoAcids);
                        writer.WriteLine("Peptides = {0}, {1} are shared (marked by *)", peptides.Count, shared);

                    }
                    else
                    {
                        writer.WriteLine("Peptides = {0}", peptides.Count);
                    }

                    writer.WriteLine("========");

                    if (proteinGroup.Count > 1)
                    {
                        writer.WriteLine("Protein Sequences (Differences marked by *)");
                        writer.Write(' ');
                        for (int i = 0; i < sequence.Length; i++)
                        {
                            bool same = true;
                            char c = sequence[i];
                            foreach (Protein prot in proteinGroup)
                            {
                                if (i >= prot.Length || !prot.Sequence[i].Equals(c))
                                {
                                    same = false;
                                    break;
                                }
                            }
                            writer.Write(same ? ' ' : '*');
                        }
                        writer.WriteLine();

                        foreach (Protein prot in proteinGroup)
                        {
                            writer.Write(' ');
                            writer.WriteLine(prot.Sequence);
                        }

                        writer.WriteLine("========");
                    }

                    // Write the amino acid numbers
                    writer.Write(" 1");
                    int size = 2;
                    for (int i = 10; i < length; i += 10)
                    {
                        for (int j = 0; j < 10 - size; j++)
                        {
                            writer.Write(' ');
                        }
                        writer.Write(i);
                        if (i < 100)
                        {
                            size = 2;
                        }
                        else if (i < 1000)
                        {
                            size = 3;
                        }
                        else if (i < 10000)
                        {
                            size = 4;
                        }
                        else
                        {
                            size = 5;
                        }
                    }
                    writer.WriteLine();

                    // Write the complete sequence
                    writer.WriteLine(" "+sequence);
                    observedWriter.Write(sequence, proteinGroup.RepresentativeProtein.Description);

                    // Write the combined mapped sequence
                    StringBuilder compressedSequence = new StringBuilder();
                    writer.Write(" ");
                    int startIndex = -1;
                    bool started = false;
                    for (int i = 0; i < bits.Length; i++)
                    {
                        if (bits[i] > 0)
                        {
                            writer.Write(sequence[i]);
                            compressedSequence.Append(sequence[i]);
                            if(!started)
                            {
                                startIndex = i;
                                started = true;
                            }
                        }
                        else
                        {
                            if (started)
                            {
                                started = false;
                                csvWriter.WriteLine("{0},{1},{2},{3},{4},{5}", proteinID, startIndex, i - startIndex, proteinGroup.SequenceCoverage, proteinGroup.Description, peptides.Count);
                            }
                            writer.Write(' ');
                            compressedSequence.Append(' ');
                        }

                    }
                    if (started)
                    {
                        csvWriter.WriteLine("{0},{1},{2},{3},{4},{5}", proteinID, startIndex, bits.Length - startIndex, proteinGroup.SequenceCoverage, proteinGroup.Description, peptides.Count);
                    }

                    identifiedWriter.Write(compressedSequence.ToString(), proteinGroup.RepresentativeProtein.Description);

                    writer.WriteLine();

                    writer.WriteLine();

                    // Write the each peptide
                    foreach (Peptide peptide in peptides.OrderBy(pep => leusequence.IndexOf(pep.LeucineSequence, 0)).ThenByDescending(pep => pep.Length))
                    {
                        writer.Write((peptide.IsShared) ? "*" : " ");
                        int start_index = 0;
                        while (true)
                        {
                            int index = leusequence.IndexOf(peptide.LeucineSequence, start_index);

                            if (index < 0)
                            {
                                break;
                            }

                            // Write blank spaces
                            writer.Write(new string(' ', index));
                            writer.Write(peptide.LeucineSequence);
                            start_index = index + 1;
                            writer.WriteLine();
                            //for (int aa = 0; aa < peptide.Length; aa++)
                            //{
                            //    writer.Write(peptide.LeucineSequence[aa]);
                            //}

                        }

                    }

                    // Give some room between proteins
                    writer.WriteLine();
                }
            }
        }
Esempio n. 3
0
        public void WriteFasta(string fasta_file, FastaWriter Writer)
        {
            bool MakeDecoy = false;

            if (Options.OutputType == DatabaseType.Target || Options.OutputType == DatabaseType.Concatenated)
            {
                MakeDecoy = false;
            }
            else if (Options.OutputType == DatabaseType.Decoy || Options.OutputType == DatabaseType.Concatenated)
            {
                MakeDecoy = true;
            }

            using (FastaReader reader = new FastaReader(fasta_file))
            {

                foreach (Fasta fasta in reader.ReadNextFasta())
                {
                    Regex uniprotRegex = new Regex(@"(.+)\|(.+)\|(.+?)\s(.+?)\sOS=(.+?)(?:\sGN=(.+?))?(?:$|PE=(\d+)\sSV=(\d+))", RegexOptions.ExplicitCapture);
                    Match UniprotMatch = uniprotRegex.Match(fasta.Description);
                    string HeaderFile = "InvalidUniprotheaders.txt";
                    string headerFolder = Path.GetDirectoryName(Options.InputFiles[0]);

                    if (Options.EnforceUniprot && !UniprotMatch.Success)
                    {
                        using (StreamWriter log = new StreamWriter(Path.Combine(headerFolder, HeaderFile), true))
                        {
                            log.WriteLine("Invalid Header:");
                            log.WriteLine();
                            log.WriteLine(fasta.Description);
                            log.WriteLine();
                            InvalidHeader(fasta);
                        }
                    }

                    if (UniprotMatch.Success)
                    {
                        bool excludeMethionine = false;
                        if (Options.ExcludeNTerminalMethionine && !Options.ExcludeNTerminalResidue)
                        {
                            excludeMethionine = true;
                        }

                        if (MakeDecoy)
                        {
                            Writer.Write(fasta.ToDecoy(Options.DecoyPrefix, Options.DecoyType, (excludeMethionine || Options.ExcludeNTerminalResidue), Options.ExcludeNTerminalMethionine));
                        }

                        else
                        {
                            Writer.Write(fasta);
                        }

                    }

                }

            }
        }