示例#1
0
        public async Task Index()
        {
            var reader = await FastaReader.CreateAsync(FastaPath);

            foreach (var identifier in reader.Identifiers)
            {
                var comment    = reader.CommentOf(identifier);
                var attributes = AttributesParser.Parse(comment);
                var id         = attributes.GetValueOrDefault("protein_id");
                if (id == null)
                {
                    throw new Exception("Id missing");
                }
                var range = ParseRange(attributes.GetValueOrDefault("location", ""));
                var data  = new GeneData(id, SequenceId)
                {
                    Symbol   = attributes.GetValueOrDefault("gene"),
                    Product  = attributes.GetValueOrDefault("protein"),
                    LocusTag = attributes.GetValueOrDefault("locus_tag") ?? throw new Exception("Locus tag missing"),
                                     // We can infer this from the fasta reader if we prevent '\n' from being in the sequence
                                     Position = range?.Item1,
                                     Length   = range != null ? range?.Item2 - range?.Item1 + 1 : null
                };
                await Database.Index(data);
            }
        }
    }
示例#2
0
        public static void ExampleDigestion()
        {
            const string  fastaFilePath      = "Resources/yeast_uniprot_120226.fasta";
            IProtease     trypsin            = Protease.GetProtease("Trypsin");
            const int     maxMissedCleavages = 3;
            const int     minPeptideLength   = 5;
            const int     maxPeptideLength   = 50;
            List <double> masses             = new List <double>();
            Stopwatch     watch = new Stopwatch();

            watch.Start();
            using (FastaReader reader = new FastaReader(fastaFilePath))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(trypsin, maxMissedCleavages, minPeptideLength, maxPeptideLength))
                    {
                        masses.Add(peptide.MonoisotopicMass);
                    }
                }
            }
            //Console.WriteLine("Average Peptide Mass = {0:F4}", masses.Average());
            watch.Stop();
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
        }
示例#3
0
        public void ReadToEndReturnsList()
        {
            FastaReader  fastaReader = new FastaReader(new StringReader(">DNA\nATCGGGCTAAT\nATCGGGCTAAT\n\n>Number2\nATCGGGCTAAT\nATCGGGCTAAT"));
            List <Fasta> fastaList   = fastaReader.ReadToEnd();

            Assert.IsTrue(fastaList != null);
        }
示例#4
0
        private void Run()
        {
            var fileService    = new FileService();
            var kmerLength     = K ?? 19;
            var errorCorrector = new ErrorCorrector(kmerLength, true);
            var fastaReader    = new FastaReader(fileService);

            var reads = fastaReader.ParseFastaFile(ReadsPath);
            // errorCorrector.BuildHistogram(reads);

            var graphBuilder = new DeBruijnGraphBuilder(kmerLength, errorCorrector);
            var graph        = graphBuilder.Build(reads);

            errorCorrector.PrintResult();

            graph.CleanUp();

            if (!string.IsNullOrWhiteSpace(DotFilePath))
            {
                Directory.CreateDirectory(Path.GetDirectoryName(DotFilePath));
                graphBuilder.ToDot(fileService, DotFilePath, graph);
            }

            var contigs = graph.GetContigs();

            Directory.CreateDirectory(Path.GetDirectoryName(ContigsPath));
            fastaReader.WriteFastaFile(ContigsPath, contigs);
        }
示例#5
0
        public void PopulateATCGDnaFastaFormat()
        {
            FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCG"));
            Fasta       fasta       = fastaReader.Read();

            Assert.AreEqual("ATCG", fasta.Dna);
        }
示例#6
0
        public static void Start(IProtease protease, int maxMissed = 1, int minLength = 0, int maxLength = int.MaxValue, bool storeSequenceString = true)
        {
            Console.WriteLine("**Start Digestion**");
            Stopwatch watch = new Stopwatch();

            watch.Start();
            List <Peptide> peps   = new List <Peptide>();
            List <Protein> prots  = new List <Protein>();
            List <double>  allMzs = new List <double>();

            AminoAcidPolymer.StoreSequenceString = storeSequenceString;
            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peps.Add(peptide);
                        allMzs.Add(peptide.ToMz(2)); // forces the calculation of the mass and thus chemical formula
                    }
                    prots.Add(protein);
                }
            }
            watch.Stop();
            Console.WriteLine("{0:N0} proteins produced {1:N0} peptides using {2:N0} missed cleavages", prots.Count, peps.Count, maxMissed);
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", Environment.WorkingSet / (1024 * 1024));
            Console.WriteLine("**End Digestion**");
        }
示例#7
0
        public void PopulateDEFHeaderFastaFormat()
        {
            FastaReader fastaReader = new FastaReader(new StringReader(">DEF"));
            Fasta       fasta       = fastaReader.Read();

            Assert.AreEqual("DEF", fasta.Header);
        }
示例#8
0
        public void ReadToEndReturnsSingleElement()
        {
            FastaReader  fastaReader = new FastaReader(new StringReader(">DNA\nATCGGGCTAAT\nATCGGGCTAAT"));
            List <Fasta> fastaList   = fastaReader.ReadToEnd();

            Assert.IsTrue(fastaList.Count == 1);
        }
        public async Task <FrequencyReport> Frequency(FastaReader reader, string evalue = "1e-6")
        {
            var report        = new FrequencyReport(reader.Length);
            var blastnResults = await Client.ExecuteBuffered(new BlastnOptions(reader.Path)
            {
                Task             = Task,
                Evalue           = evalue,
                FormatSpecifiers = new List <string> {
                    "qseqid"
                },
                MaxTargetSeqs = MaxTargetSeqs
            });

            foreach (var result in blastnResults)
            {
                if (result.QuerySeqId == null)
                {
                    continue;
                }
                var index = reader.IndexOf(result.QuerySeqId);
                report.Increment(index);
            }

            return(report);
        }
示例#10
0
        public void WriteProteinDatabase(string fastaFilePath, string name = "", string releaseDate = "")
        {
            if (CurrentStage != Stage.SearchSummary)
            {
                throw new ArgumentException("You must be in the Search Summary stage to write protein databases");
            }

            _writer.WriteStartElement("search_database");
            _writer.WriteAttributeString("seq_type", "AA");
            _writer.WriteAttributeString("local_path", fastaFilePath);

            name = (string.IsNullOrEmpty(name)) ? Path.GetFileNameWithoutExtension(fastaFilePath) : name;
            _writer.WriteAttributeString("database_name", name);

            if (!string.IsNullOrEmpty(releaseDate))
            {
                _writer.WriteAttributeString("database_release_date", releaseDate);
            }

            int entries = FastaReader.NumberOfEntries(fastaFilePath);

            _writer.WriteAttributeString("size_in_db_entries", entries.ToString());

            _writer.WriteEndElement(); // search_database
        }
示例#11
0
        public void BuildGraph_GenerateDotFile_WriteContigs(string fastaPath)
        {
            var assemblyName = "IntegrationTests";
            var projectPath  = Environment.CurrentDirectory.Substring(0, Environment.CurrentDirectory.IndexOf(assemblyName) + assemblyName.Length);

            fastaPath = Path.Combine(projectPath, fastaPath);
            var fileService    = new FileService();
            var kmerLength     = 19;
            var errorCorrector = new ErrorCorrector(kmerLength);
            var fastaReader    = new FastaReader(fileService);

            var reads = fastaReader.ParseFastaFile(fastaPath);

            errorCorrector.BuildHistogram(reads);

            var graphBuilder = new DeBruijnGraphBuilder(kmerLength, errorCorrector);
            var graph        = graphBuilder.Build(reads);

            errorCorrector.PrintResult();

            graph.CleanUp();
            var dotFileDirectory = Path.Combine(Path.GetDirectoryName(fastaPath), "graphs");

            Directory.CreateDirectory(dotFileDirectory);
            graphBuilder.ToDot(fileService, Path.Combine(dotFileDirectory, Path.GetFileNameWithoutExtension(fastaPath) + ".dot"), graph);

            var contigs          = graph.GetContigs();
            var contigsDirectory = Path.Combine(Path.GetDirectoryName(fastaPath), "contigs");

            Directory.CreateDirectory(contigsDirectory);
            fastaReader.WriteFastaFile(Path.Combine(contigsDirectory, Path.GetFileNameWithoutExtension(fastaPath) + ".contigs.fasta"), contigs);
        }
示例#12
0
 public static IEnumerable <ProteinGroup> GroupProteins(string fastaFile, IProtease protease, IEnumerable <IAminoAcidSequence> observeredSequences, IEqualityComparer <IAminoAcidSequence> peptideComparer, int MaxMissedCleavages = 3)
 {
     using (FastaReader fasta = new FastaReader(fastaFile))
     {
         return(GroupProteins(fasta.ReadNextProtein(), new[] { protease }, observeredSequences, peptideComparer, MaxMissedCleavages));
     }
 }
        public static void StartRamp(IProtease protease, double percentIdentifiedSteps = 0.05, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            List <Peptide> peps     = new List <Peptide>();
            List <Protein> proteins = new List <Protein>();

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peps.Add(peptide);
                    }
                    proteins.Add(protein);
                }
            }

            // Fixed seed to make it reproducible
            Random random = new Random(480912341);

            peps = peps.OrderBy(x => random.Next()).ToList();

            for (double percentIdentified = 0; percentIdentified <= 1; percentIdentified += percentIdentifiedSteps)
            {
                // Take the first x % to act as our identified peptides
                List <Peptide> identifiedPeptides = peps.Take((int)(peps.Count * percentIdentified)).ToList();

                List <ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();
                Console.WriteLine("{0} peptides {1} protein groups", identifiedPeptides.Count, proteinGroups.Count);
            }
        }
示例#14
0
        private void WriteOutputs(string fastaPath, string outputPath)
        {
            int chromosomeIndex = -1;

            using (FastaReader reader = new FastaReader(fastaPath))
                using (FastaWriter writer = new FastaWriter(outputPath))
                {
                    GenericRead fastaEntry = new GenericRead();
                    while (reader.GetNextEntry(ref fastaEntry))
                    {
                        chromosomeIndex++;
                        StringBuilder baseBuilder    = new StringBuilder();
                        BitArray      nonUniqueFlags = ChromosomeNonUniqueFlags[chromosomeIndex];
                        for (int chromPos = 0; chromPos < fastaEntry.Bases.Length; chromPos++)
                        {
                            if (nonUniqueFlags[chromPos])
                            {
                                baseBuilder.Append(char.ToLowerInvariant(fastaEntry.Bases[chromPos]));
                            }
                            else
                            {
                                baseBuilder.Append(char.ToUpperInvariant(fastaEntry.Bases[chromPos]));
                            }
                        }
                        writer.WriteEntry(fastaEntry.Name, baseBuilder.ToString());
                    }
                }
        }
示例#15
0
        public static void Start(IProtease protease, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            Console.WriteLine("**Start Morpheus Search**");
            Stopwatch watch = new Stopwatch();

            watch.Start();
            List <int> hashCodes = new List <int>();
            // Generate peptide candidates

            HashSet <Peptide> peptides = new HashSet <Peptide>();

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peptides.Add(peptide);
                    }
                }
            }

            MSSearchEngine engine = new MorpheusSearchEngine();

            engine.PrecursorMassTolerance = Tolerance.FromPPM(100);
            engine.ProductMassTolerance   = Tolerance.FromPPM(10);

            engine.LoadPeptides(peptides);

            watch.Stop();
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", Environment.WorkingSet / (1024 * 1024));
            Console.WriteLine("**End Morpheus Search**");
        }
        public static void ExampleProteinGrouping(IProtease protease, double percentIdentified = 0.01, int maxMissed = 3, int minLength = 5, int maxLength = 50)
        {
            Stopwatch watch = new Stopwatch();

            watch.Start();
            List <Peptide> peps     = new List <Peptide>(1000000);
            List <Protein> proteins = new List <Protein>(7000);

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    peps.AddRange(protein.Digest(protease, maxMissed, minLength, maxLength));
                    proteins.Add(protein);
                }
            }
            Console.WriteLine("Loaded {0:N0} peptides from {1:N0} proteins in {2} ms", peps.Count, proteins.Count, watch.ElapsedMilliseconds);
            watch.Restart();

            Random random = new Random(480912341);

            // Take the first x % to act as our identified peptides
            List <Peptide> identifiedPeptides = peps.OrderBy(x => random.Next()).Take((int)(peps.Count * percentIdentified)).ToList();

            List <ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();

            watch.Stop();

            Console.WriteLine("{0:N0} proteins produced {1:N0} protein groups from {2:N0} identified sequences", proteins.Count, proteinGroups.Count, identifiedPeptides.Count);
            Console.WriteLine();
            Console.WriteLine("Time elapsed: {0} ms", watch.ElapsedMilliseconds);
        }
示例#17
0
        public void PopulateMultiLineDnaFastaFormat()
        {
            FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCGGGCTAAT\nATCGGGCTAAT\nATCGGGCTAAT\nATCGGGCTAAT"));
            Fasta       fasta       = fastaReader.Read();

            Assert.AreEqual("ATCGGGCTAATATCGGGCTAATATCGGGCTAATATCGGGCTAAT", fasta.Dna);
        }
示例#18
0
        public void ReadToEndReturnsTwoElements()
        {
            FastaReader  fastaReader = new FastaReader(new StringReader(">DNA\nATCGGGCTAAT\nATCGGGCTAAT\n\n>Number2\nATCGGGCTAAT\nATCGGGCTAAT"));
            List <Fasta> fastaList   = fastaReader.ReadToEnd();

            Assert.IsTrue(fastaList.Count == 2);
        }
示例#19
0
        private Task Process()
        {
            Counter readerCounter = new Counter();
            Counter searchCounter = new Counter();

            readerCounter.progressChange += ReadProgressChanged;
            searchCounter.progressChange += SearchProgressChanged;

            // build pepeptides
            Peptide.To.SetCysteine(ConfigureParameters.Access.Cysteine);
            IProteinReader  proteinReader = new FastaReader();
            List <IProtein> proteins      = proteinReader.Read(SearchingParameters.Access.FastaFile);
            List <IProtein> decoyProteins = new List <IProtein>();

            foreach (IProtein protein in proteins)
            {
                IProtein p = new BaseProtein();
                p.SetSequence(MultiThreadingSearchHelper.Reverse(protein.Sequence()));
                decoyProteins.Add(p);
            }
            List <string> peptides =
                MultiThreadingSearchHelper.GeneratePeptides(proteins);
            List <string> decoyPeptides =
                MultiThreadingSearchHelper.GeneratePeptides(decoyProteins);


            // build glycans
            GlycanBuilder glycanBuilder = new GlycanBuilder(
                SearchingParameters.Access.HexNAc,
                SearchingParameters.Access.Hex,
                SearchingParameters.Access.Fuc,
                SearchingParameters.Access.NeuAc,
                SearchingParameters.Access.NeuGc,
                SearchingParameters.Access.ComplexInclude,
                SearchingParameters.Access.HybridInclude,
                SearchingParameters.Access.MannoseInclude);

            glycanBuilder.Build();

            int index = 1;

            foreach (string file in SearchingParameters.Access.MSMSFiles)
            {
                ReadingCounter  = 0;
                ProgressCounter = 0;
                UpdateProgress(100);
                Readingprogress(100);
                UpdateSignal($"Searching...({index++}/{SearchingParameters.Access.MSMSFiles.Count})");
                MultiThreadingSearch search =
                    new MultiThreadingSearch(file, readerCounter, searchCounter,
                                             peptides, decoyPeptides, glycanBuilder);
                search.Run();
                UpdateSignal("Analyzing...");
                Analyze(file, search.Target(), search.Decoy(), glycanBuilder);
            }

            UpdateSignal("Done");
            return(Task.CompletedTask);
        }
示例#20
0
        public void ParseFastaFile_EmptyFile_ThrowsArgumentException()
        {
            var fileReader = new TestFileService();

            fileReader.fileLines = new List <string>();
            var fastaReader = new FastaReader(fileReader);

            Assert.Throws <ArgumentException>(() =>
                                              fastaReader.ParseFastaFile("path").ToList());
        }
示例#21
0
        public void ParseFastaFile_ReturnsCorrectSequences()
        {
            var fileReader  = new TestFileService();
            var fastaReader = new FastaReader(fileReader);

            var sequences = fastaReader.ParseFastaFile("path").ToList();

            Assert.Equal(10, sequences.Count);
            Assert.All(sequences, s => Assert.Equal(80, s.Length));
        }
示例#22
0
        public void WriteFasta(string fasta_file, FastaWriter Writer)
        {
            bool MakeDecoy = false;

            if (Options.OutputType == DatabaseType.Target || Options.OutputType == DatabaseType.Concatenated)
            {
                MakeDecoy = false;
            }
            else if (Options.OutputType == DatabaseType.Decoy || Options.OutputType == DatabaseType.Concatenated)
            {
                MakeDecoy = true;
            }

            using (FastaReader reader = new FastaReader(fasta_file))
            {
                foreach (Fasta fasta in reader.ReadNextFasta())
                {
                    Regex  uniprotRegex = new Regex(@"(.+)\|(.+)\|(.+?)\s(.+?)\sOS=(.+?)(?:\sGN=(.+?))?(?:$|PE=(\d+)\sSV=(\d+))", RegexOptions.ExplicitCapture);
                    Match  UniprotMatch = uniprotRegex.Match(fasta.Description);
                    string HeaderFile   = "InvalidUniprotheaders.txt";
                    string headerFolder = Path.GetDirectoryName(Options.InputFiles[0]);

                    if (Options.EnforceUniprot && !UniprotMatch.Success)
                    {
                        using (StreamWriter log = new StreamWriter(Path.Combine(headerFolder, HeaderFile), true))
                        {
                            log.WriteLine("Invalid Header:");
                            log.WriteLine();
                            log.WriteLine(fasta.Description);
                            log.WriteLine();
                            InvalidHeader(fasta);
                        }
                    }

                    if (UniprotMatch.Success)
                    {
                        bool excludeMethionine = false;
                        if (Options.ExcludeNTerminalMethionine && !Options.ExcludeNTerminalResidue)
                        {
                            excludeMethionine = true;
                        }

                        if (MakeDecoy)
                        {
                            Writer.Write(fasta.ToDecoy(Options.DecoyPrefix, Options.DecoyType, (excludeMethionine || Options.ExcludeNTerminalResidue), Options.ExcludeNTerminalMethionine));
                        }

                        else
                        {
                            Writer.Write(fasta);
                        }
                    }
                }
            }
        }
示例#23
0
        static public void CheckUniqueness()
        {
            string fastaPath = @"D:\Genomes\Homo_sapiens\UCSC\hg19\Sequence\WholeGenomeFasta\genome.fa";

            string[] Reads = new string[]
            {
                "AACCCTAACCCAACCCTAACCCTAACCCTAACCCT", // 10097 B
                "ACCCTAACCCAACCCTAACCCTAACCCTAACCCTA", // 10098 B
                "AGAGGACAACGCAGCTCCGCCCTCGCGGTGCTCTC", // 10553 A
                "TTTTTTCCTATACATACATACCCATGATAAAGTTT"  // 30763880 A
            };

            using (FastaReader readerA = new FastaReader(fastaPath))
            {
                GenericRead chrA = new GenericRead();
                while (true)
                {
                    bool result = readerA.GetNextEntry(ref chrA);
                    if (!result)
                    {
                        break;
                    }
                    Console.WriteLine(chrA.Name);
                    string bases = chrA.Bases.ToUpperInvariant();
                    // Search for each:

                    for (int readIndex = 0; readIndex < Reads.Length; readIndex++)
                    {
                        int pos = -1;
                        while (true)
                        {
                            pos = bases.IndexOf(Reads[readIndex], pos + 1);
                            if (pos == -1)
                            {
                                break;
                            }
                            Console.WriteLine("{0}\t{1}\t{2}\t{3}", readIndex, Reads[readIndex], chrA.Name, pos);
                        }
                        pos = -1;
                        string revComp = Isas.Shared.Utilities.GetReverseComplement(Reads[readIndex]);
                        while (true)
                        {
                            pos = bases.IndexOf(revComp, pos + 1);
                            if (pos == -1)
                            {
                                break;
                            }
                            Console.WriteLine("{0}\t{1}\t{2}\t{3}\tRevComp", readIndex, Reads[readIndex], chrA.Name, pos);
                        }
                    }
                }
            }
            Console.WriteLine(">>>Done.");
        }
示例#24
0
        private List <string> GenerateGlycoPeptide()
        {
            List <ProteinInfo> PInfos       = FastaReader.ReadFasta(_fastaFile);
            List <string>      GlycoPeptide = new List <string>();

            foreach (ProteinInfo Prot in PInfos)
            {
                GlycoPeptide.AddRange(Prot.NGlycopeptide(_MissCLeavage, _ProteaseType, _peptideMutation));
            }
            return(GlycoPeptide);
        }
示例#25
0
        static public int ProcessReferenceFASTA(string fastaPathA, string fastaPathB)
        {
            GenericRead chrA         = new GenericRead();
            GenericRead chrB         = new GenericRead();
            long        CountAB      = 0;
            long        CountA       = 0;
            long        CountB       = 0;
            long        CountNeither = 0;

            using (FastaReader readerA = new FastaReader(fastaPathA))
                using (FastaReader readerB = new FastaReader(fastaPathB))
                {
                    readerA.GetNextEntry(ref chrA); // Discard chrM from new output
                    while (true)
                    {
                        bool result = readerA.GetNextEntry(ref chrA);
                        if (!result)
                        {
                            break;
                        }
                        readerB.GetNextEntry(ref chrB);
                        if (chrA.Bases.Length != chrB.Bases.Length)
                        {
                            throw new Exception();
                        }
                        for (int baseIndex = 0; baseIndex < chrA.Bases.Length; baseIndex++)
                        {
                            bool isUniqueA = chrA.Bases[baseIndex] < 'a';
                            bool isUniqueB = chrB.Bases[baseIndex] < 'a';
                            if (isUniqueA && isUniqueB)
                            {
                                CountAB++;
                            }
                            else if (isUniqueA && !isUniqueB)
                            {
                                CountA++;
                            }
                            else if (!isUniqueA && isUniqueB)
                            {
                                CountB++;
                            }
                            else
                            {
                                CountNeither++;
                            }
                        }
                        Console.WriteLine("After {0}: {1},{2},{3},{4}", chrA.Name,
                                          CountAB, CountA, CountB, CountNeither);
                        double percentAgreement = 100 * (CountAB + CountNeither) / (double)(CountAB + CountA + CountB + CountNeither);
                        Console.WriteLine("Percent agreement: {0:F2}", percentAgreement);
                    }
                }
            return(0);
        }
示例#26
0
        public void HandleMultipleFastaFormatWithEmptyLines()
        {
            FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCGGGCTAAT\nATCGGGCTAAT\n\n>Number2\nATCGGGCTAAT\nATCGGGCTAAT"));
            Fasta       fasta       = fastaReader.Read();

            Assert.AreEqual("DNA", fasta.Header);
            Assert.AreEqual("ATCGGGCTAATATCGGGCTAAT", fasta.Dna);

            fasta = fastaReader.Read();
            Assert.AreEqual("Number2", fasta.Header);
            Assert.AreEqual("ATCGGGCTAATATCGGGCTAAT", fasta.Dna);
        }
示例#27
0
 /// <summary>
 /// Input GlycanCompostion (From Glycan list)
 /// </summary>
 /// <param name="argStartScan"></param>
 /// <param name="argEndScan"></param>
 /// <param name="argMSMSTol"></param>
 /// <param name="argPrecursorTol"></param>
 /// <param name="argNGlycan"></param>
 /// <param name="argHuman"></param>
 /// <param name="argGlycanCompounds"></param>
 /// <param name="argMassGlycanMapping"></param>
 /// <param name="argGlycanCompoundMassList"></param>
 /// <param name="argRawFilePath"></param>
 /// <param name="argFastaFile"></param>
 /// <param name="argProteaseType"></param>
 /// <param name="argMissCleavage"></param>
 /// <param name="argAverageMass"></param>
 /// <param name="argUseHCD"></param>
 /// <param name="argExportFilename"></param>
 /// <param name="argGetTopRank"></param>
 /// <param name="argCompletedOnly"></param>
 /// <param name="argCompletedReward"></param>
 public frmInvokeProcesses(int argStartScan,
                           int argEndScan,
                           float argMSMSTol,
                           float argPrecursorTol,
                           bool argNGlycan,
                           bool argHuman,
                           List <GlycanCompound> argGlycanCompounds,
                           Dictionary <double, GlycanCompound> argMassGlycanMapping,
                           List <float> argGlycanCompoundMassList,
                           string argGlycanFile,
                           string argRawFilePath,
                           string argFastaFile,
                           List <Protease.Type> argProteaseType,
                           int argMissCleavage,
                           bool argAverageMass,
                           bool argUseHCD,
                           bool argSeqHCD,
                           string argExportFile,
                           int argGetTopRank,
                           bool argCompletedOnly,
                           float argCompletedReward
                           )
 {
     InitializeComponent();
     AAMW                    = new AminoAcidMass();
     _StartScan              = argStartScan;
     _EndScan                = argEndScan;
     _MSMSTol                = argMSMSTol;
     _PrecursorTol           = argPrecursorTol;
     _NGlycan                = argNGlycan;
     _Human                  = argHuman;
     _GlycanCompounds        = argGlycanCompounds;
     _MassGlycanMapping      = argMassGlycanMapping;
     _GlycanCompoundMassList = argGlycanCompoundMassList;
     _glycanFile             = argGlycanFile;
     _rawFilePath            = argRawFilePath;
     Proteins                = FastaReader.ReadFasta(argFastaFile);
     _fastaFile              = argFastaFile;
     _ProteaseType           = argProteaseType;
     _MissCLeavage           = argMissCleavage;
     _AverageMass            = argAverageMass;
     _UseGlycanList          = true;
     _exportFile             = argExportFile;
     _GetTopRank             = argGetTopRank;
     _UseHCD                 = argUseHCD;
     _SeqHCD                 = argSeqHCD;
     _CompletedOnly          = argCompletedOnly;
     _CompletedReward        = argCompletedReward;
     WorkThread              = new Thread(RunWork);
     WorkThread.Start();
 }
示例#28
0
 /// <summary>
 /// Input Number of Glycans (blind search)
 /// </summary>
 /// <param name="argStartScan"></param>
 /// <param name="argEndScan"></param>
 /// <param name="argMSMSTol"></param>
 /// <param name="argPrecursorTol"></param>
 /// <param name="argNGlycan"></param>
 /// <param name="argHuman"></param>
 /// <param name="argNoHexNAc"></param>
 /// <param name="argNoHex"></param>
 /// <param name="argNoDeHex"></param>
 /// <param name="argNoSia"></param>
 /// <param name="argRawFilePath"></param>
 /// <param name="argFastaFile"></param>
 /// <param name="argProteaseType"></param>
 /// <param name="argMissCleavage"></param>
 /// <param name="argAverageMass"></param>
 /// <param name="argUseHCD"></param>
 /// <param name="argExportFilename"></param>
 /// <param name="argGetTopRank"></param>
 /// <param name="argCompletedOnly"></param>
 /// <param name="argCompletedReward"></param>
 public frmInvokeProcesses(int argStartScan,
                           int argEndScan,
                           float argMSMSTol,
                           float argPrecursorTol,
                           bool argNGlycan,
                           bool argHuman,
                           int argNoHexNAc,
                           int argNoHex,
                           int argNoDeHex,
                           int argNoSia,
                           string argRawFilePath,
                           string argFastaFile,
                           List <Protease.Type> argProteaseType,
                           int argMissCleavage,
                           bool argAverageMass,
                           bool argUseHCD,
                           bool argSeqHCD,
                           string argExportFile,
                           int argGetTopRank,
                           bool argCompletedOnly,
                           float argCompletedReward
                           )
 {
     InitializeComponent();
     AAMW             = new AminoAcidMass();
     _StartScan       = argStartScan;
     _EndScan         = argEndScan;
     _MSMSTol         = argMSMSTol;
     _PrecursorTol    = argPrecursorTol;
     _NGlycan         = argNGlycan;
     _Human           = argHuman;
     _NoHexNAc        = argNoHexNAc;
     _NoHex           = argNoHex;
     _NoDeHex         = argNoDeHex;
     _NoSia           = argNoSia;
     _rawFilePath     = argRawFilePath;
     Proteins         = FastaReader.ReadFasta(argFastaFile);
     _fastaFile       = argFastaFile;
     _ProteaseType    = argProteaseType;
     _MissCLeavage    = argMissCleavage;
     _AverageMass     = argAverageMass;
     _UseGlycanList   = false;
     _UseHCD          = argUseHCD;
     _SeqHCD          = argSeqHCD;
     _CompletedOnly   = argCompletedOnly;
     _CompletedReward = argCompletedReward;
     _exportFile      = argExportFile;
     _GetTopRank      = argGetTopRank;
     WorkThread       = new Thread(RunWork);
     WorkThread.Start();
 }
示例#29
0
        public void ParseFastaFile_NoSequencesInFile_ThrowsArgumentException()
        {
            var fileReader = new TestFileService();

            fileReader.fileLines = new List <string>()
            {
                "this is definitely not a fasta file",
                "ATCGCTGJGJGNVBCACABJP",
                "dvdfvdf fsdfs dfsdfsgdf"
            };
            var fastaReader = new FastaReader(fileReader);

            Assert.Throws <ArgumentException>(() =>
                                              fastaReader.ParseFastaFile("path").ToList());
        }
示例#30
0
        private static FastaSequence GetFastaSequence(string fastaPath, IDictionary <string, IChromosome> refNameToChromosome)
        {
            var references = new List <FastaSequence>();

            FastaReader.AddReferenceSequences(new GZipStream(FileUtilities.GetReadStream(fastaPath), CompressionMode.Decompress), refNameToChromosome, references);

            if (references.Count != 1)
            {
                throw new InvalidDataException($"Expected 1 reference, but found {references.Count} references.");
            }

            var    reference = references[0];
            int    length    = _endPosition - _beginPosition + 1;
            string substring = reference.Bases.Substring(_beginPosition - 1, length);

            return(new FastaSequence(reference.Chromosome, substring));
        }