public async Task Index() { var reader = await FastaReader.CreateAsync(FastaPath); foreach (var identifier in reader.Identifiers) { var comment = reader.CommentOf(identifier); var attributes = AttributesParser.Parse(comment); var id = attributes.GetValueOrDefault("protein_id"); if (id == null) { throw new Exception("Id missing"); } var range = ParseRange(attributes.GetValueOrDefault("location", "")); var data = new GeneData(id, SequenceId) { Symbol = attributes.GetValueOrDefault("gene"), Product = attributes.GetValueOrDefault("protein"), LocusTag = attributes.GetValueOrDefault("locus_tag") ?? throw new Exception("Locus tag missing"), // We can infer this from the fasta reader if we prevent '\n' from being in the sequence Position = range?.Item1, Length = range != null ? range?.Item2 - range?.Item1 + 1 : null }; await Database.Index(data); } } }
public static void ExampleDigestion() { const string fastaFilePath = "Resources/yeast_uniprot_120226.fasta"; IProtease trypsin = Protease.GetProtease("Trypsin"); const int maxMissedCleavages = 3; const int minPeptideLength = 5; const int maxPeptideLength = 50; List <double> masses = new List <double>(); Stopwatch watch = new Stopwatch(); watch.Start(); using (FastaReader reader = new FastaReader(fastaFilePath)) { foreach (Protein protein in reader.ReadNextProtein()) { foreach (Peptide peptide in protein.Digest(trypsin, maxMissedCleavages, minPeptideLength, maxPeptideLength)) { masses.Add(peptide.MonoisotopicMass); } } } //Console.WriteLine("Average Peptide Mass = {0:F4}", masses.Average()); watch.Stop(); Console.WriteLine("Time elapsed: {0}", watch.Elapsed); }
public void ReadToEndReturnsList() { FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCGGGCTAAT\nATCGGGCTAAT\n\n>Number2\nATCGGGCTAAT\nATCGGGCTAAT")); List <Fasta> fastaList = fastaReader.ReadToEnd(); Assert.IsTrue(fastaList != null); }
private void Run() { var fileService = new FileService(); var kmerLength = K ?? 19; var errorCorrector = new ErrorCorrector(kmerLength, true); var fastaReader = new FastaReader(fileService); var reads = fastaReader.ParseFastaFile(ReadsPath); // errorCorrector.BuildHistogram(reads); var graphBuilder = new DeBruijnGraphBuilder(kmerLength, errorCorrector); var graph = graphBuilder.Build(reads); errorCorrector.PrintResult(); graph.CleanUp(); if (!string.IsNullOrWhiteSpace(DotFilePath)) { Directory.CreateDirectory(Path.GetDirectoryName(DotFilePath)); graphBuilder.ToDot(fileService, DotFilePath, graph); } var contigs = graph.GetContigs(); Directory.CreateDirectory(Path.GetDirectoryName(ContigsPath)); fastaReader.WriteFastaFile(ContigsPath, contigs); }
public void PopulateATCGDnaFastaFormat() { FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCG")); Fasta fasta = fastaReader.Read(); Assert.AreEqual("ATCG", fasta.Dna); }
public static void Start(IProtease protease, int maxMissed = 1, int minLength = 0, int maxLength = int.MaxValue, bool storeSequenceString = true) { Console.WriteLine("**Start Digestion**"); Stopwatch watch = new Stopwatch(); watch.Start(); List <Peptide> peps = new List <Peptide>(); List <Protein> prots = new List <Protein>(); List <double> allMzs = new List <double>(); AminoAcidPolymer.StoreSequenceString = storeSequenceString; using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta")) { foreach (Protein protein in reader.ReadNextProtein()) { foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength)) { peps.Add(peptide); allMzs.Add(peptide.ToMz(2)); // forces the calculation of the mass and thus chemical formula } prots.Add(protein); } } watch.Stop(); Console.WriteLine("{0:N0} proteins produced {1:N0} peptides using {2:N0} missed cleavages", prots.Count, peps.Count, maxMissed); Console.WriteLine("Time elapsed: {0}", watch.Elapsed); Console.WriteLine("Memory used: {0:N0} MB", Environment.WorkingSet / (1024 * 1024)); Console.WriteLine("**End Digestion**"); }
public void PopulateDEFHeaderFastaFormat() { FastaReader fastaReader = new FastaReader(new StringReader(">DEF")); Fasta fasta = fastaReader.Read(); Assert.AreEqual("DEF", fasta.Header); }
public void ReadToEndReturnsSingleElement() { FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCGGGCTAAT\nATCGGGCTAAT")); List <Fasta> fastaList = fastaReader.ReadToEnd(); Assert.IsTrue(fastaList.Count == 1); }
public async Task <FrequencyReport> Frequency(FastaReader reader, string evalue = "1e-6") { var report = new FrequencyReport(reader.Length); var blastnResults = await Client.ExecuteBuffered(new BlastnOptions(reader.Path) { Task = Task, Evalue = evalue, FormatSpecifiers = new List <string> { "qseqid" }, MaxTargetSeqs = MaxTargetSeqs }); foreach (var result in blastnResults) { if (result.QuerySeqId == null) { continue; } var index = reader.IndexOf(result.QuerySeqId); report.Increment(index); } return(report); }
public void WriteProteinDatabase(string fastaFilePath, string name = "", string releaseDate = "") { if (CurrentStage != Stage.SearchSummary) { throw new ArgumentException("You must be in the Search Summary stage to write protein databases"); } _writer.WriteStartElement("search_database"); _writer.WriteAttributeString("seq_type", "AA"); _writer.WriteAttributeString("local_path", fastaFilePath); name = (string.IsNullOrEmpty(name)) ? Path.GetFileNameWithoutExtension(fastaFilePath) : name; _writer.WriteAttributeString("database_name", name); if (!string.IsNullOrEmpty(releaseDate)) { _writer.WriteAttributeString("database_release_date", releaseDate); } int entries = FastaReader.NumberOfEntries(fastaFilePath); _writer.WriteAttributeString("size_in_db_entries", entries.ToString()); _writer.WriteEndElement(); // search_database }
public void BuildGraph_GenerateDotFile_WriteContigs(string fastaPath) { var assemblyName = "IntegrationTests"; var projectPath = Environment.CurrentDirectory.Substring(0, Environment.CurrentDirectory.IndexOf(assemblyName) + assemblyName.Length); fastaPath = Path.Combine(projectPath, fastaPath); var fileService = new FileService(); var kmerLength = 19; var errorCorrector = new ErrorCorrector(kmerLength); var fastaReader = new FastaReader(fileService); var reads = fastaReader.ParseFastaFile(fastaPath); errorCorrector.BuildHistogram(reads); var graphBuilder = new DeBruijnGraphBuilder(kmerLength, errorCorrector); var graph = graphBuilder.Build(reads); errorCorrector.PrintResult(); graph.CleanUp(); var dotFileDirectory = Path.Combine(Path.GetDirectoryName(fastaPath), "graphs"); Directory.CreateDirectory(dotFileDirectory); graphBuilder.ToDot(fileService, Path.Combine(dotFileDirectory, Path.GetFileNameWithoutExtension(fastaPath) + ".dot"), graph); var contigs = graph.GetContigs(); var contigsDirectory = Path.Combine(Path.GetDirectoryName(fastaPath), "contigs"); Directory.CreateDirectory(contigsDirectory); fastaReader.WriteFastaFile(Path.Combine(contigsDirectory, Path.GetFileNameWithoutExtension(fastaPath) + ".contigs.fasta"), contigs); }
public static IEnumerable <ProteinGroup> GroupProteins(string fastaFile, IProtease protease, IEnumerable <IAminoAcidSequence> observeredSequences, IEqualityComparer <IAminoAcidSequence> peptideComparer, int MaxMissedCleavages = 3) { using (FastaReader fasta = new FastaReader(fastaFile)) { return(GroupProteins(fasta.ReadNextProtein(), new[] { protease }, observeredSequences, peptideComparer, MaxMissedCleavages)); } }
public static void StartRamp(IProtease protease, double percentIdentifiedSteps = 0.05, int maxMissed = 3, int minLength = 5, int maxLength = 35) { List <Peptide> peps = new List <Peptide>(); List <Protein> proteins = new List <Protein>(); using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta")) { foreach (Protein protein in reader.ReadNextProtein()) { foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength)) { peps.Add(peptide); } proteins.Add(protein); } } // Fixed seed to make it reproducible Random random = new Random(480912341); peps = peps.OrderBy(x => random.Next()).ToList(); for (double percentIdentified = 0; percentIdentified <= 1; percentIdentified += percentIdentifiedSteps) { // Take the first x % to act as our identified peptides List <Peptide> identifiedPeptides = peps.Take((int)(peps.Count * percentIdentified)).ToList(); List <ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList(); Console.WriteLine("{0} peptides {1} protein groups", identifiedPeptides.Count, proteinGroups.Count); } }
private void WriteOutputs(string fastaPath, string outputPath) { int chromosomeIndex = -1; using (FastaReader reader = new FastaReader(fastaPath)) using (FastaWriter writer = new FastaWriter(outputPath)) { GenericRead fastaEntry = new GenericRead(); while (reader.GetNextEntry(ref fastaEntry)) { chromosomeIndex++; StringBuilder baseBuilder = new StringBuilder(); BitArray nonUniqueFlags = ChromosomeNonUniqueFlags[chromosomeIndex]; for (int chromPos = 0; chromPos < fastaEntry.Bases.Length; chromPos++) { if (nonUniqueFlags[chromPos]) { baseBuilder.Append(char.ToLowerInvariant(fastaEntry.Bases[chromPos])); } else { baseBuilder.Append(char.ToUpperInvariant(fastaEntry.Bases[chromPos])); } } writer.WriteEntry(fastaEntry.Name, baseBuilder.ToString()); } } }
public static void Start(IProtease protease, int maxMissed = 3, int minLength = 5, int maxLength = 35) { Console.WriteLine("**Start Morpheus Search**"); Stopwatch watch = new Stopwatch(); watch.Start(); List <int> hashCodes = new List <int>(); // Generate peptide candidates HashSet <Peptide> peptides = new HashSet <Peptide>(); using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta")) { foreach (Protein protein in reader.ReadNextProtein()) { foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength)) { peptides.Add(peptide); } } } MSSearchEngine engine = new MorpheusSearchEngine(); engine.PrecursorMassTolerance = Tolerance.FromPPM(100); engine.ProductMassTolerance = Tolerance.FromPPM(10); engine.LoadPeptides(peptides); watch.Stop(); Console.WriteLine("Time elapsed: {0}", watch.Elapsed); Console.WriteLine("Memory used: {0:N0} MB", Environment.WorkingSet / (1024 * 1024)); Console.WriteLine("**End Morpheus Search**"); }
public static void ExampleProteinGrouping(IProtease protease, double percentIdentified = 0.01, int maxMissed = 3, int minLength = 5, int maxLength = 50) { Stopwatch watch = new Stopwatch(); watch.Start(); List <Peptide> peps = new List <Peptide>(1000000); List <Protein> proteins = new List <Protein>(7000); using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta")) { foreach (Protein protein in reader.ReadNextProtein()) { peps.AddRange(protein.Digest(protease, maxMissed, minLength, maxLength)); proteins.Add(protein); } } Console.WriteLine("Loaded {0:N0} peptides from {1:N0} proteins in {2} ms", peps.Count, proteins.Count, watch.ElapsedMilliseconds); watch.Restart(); Random random = new Random(480912341); // Take the first x % to act as our identified peptides List <Peptide> identifiedPeptides = peps.OrderBy(x => random.Next()).Take((int)(peps.Count * percentIdentified)).ToList(); List <ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList(); watch.Stop(); Console.WriteLine("{0:N0} proteins produced {1:N0} protein groups from {2:N0} identified sequences", proteins.Count, proteinGroups.Count, identifiedPeptides.Count); Console.WriteLine(); Console.WriteLine("Time elapsed: {0} ms", watch.ElapsedMilliseconds); }
public void PopulateMultiLineDnaFastaFormat() { FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCGGGCTAAT\nATCGGGCTAAT\nATCGGGCTAAT\nATCGGGCTAAT")); Fasta fasta = fastaReader.Read(); Assert.AreEqual("ATCGGGCTAATATCGGGCTAATATCGGGCTAATATCGGGCTAAT", fasta.Dna); }
public void ReadToEndReturnsTwoElements() { FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCGGGCTAAT\nATCGGGCTAAT\n\n>Number2\nATCGGGCTAAT\nATCGGGCTAAT")); List <Fasta> fastaList = fastaReader.ReadToEnd(); Assert.IsTrue(fastaList.Count == 2); }
private Task Process() { Counter readerCounter = new Counter(); Counter searchCounter = new Counter(); readerCounter.progressChange += ReadProgressChanged; searchCounter.progressChange += SearchProgressChanged; // build pepeptides Peptide.To.SetCysteine(ConfigureParameters.Access.Cysteine); IProteinReader proteinReader = new FastaReader(); List <IProtein> proteins = proteinReader.Read(SearchingParameters.Access.FastaFile); List <IProtein> decoyProteins = new List <IProtein>(); foreach (IProtein protein in proteins) { IProtein p = new BaseProtein(); p.SetSequence(MultiThreadingSearchHelper.Reverse(protein.Sequence())); decoyProteins.Add(p); } List <string> peptides = MultiThreadingSearchHelper.GeneratePeptides(proteins); List <string> decoyPeptides = MultiThreadingSearchHelper.GeneratePeptides(decoyProteins); // build glycans GlycanBuilder glycanBuilder = new GlycanBuilder( SearchingParameters.Access.HexNAc, SearchingParameters.Access.Hex, SearchingParameters.Access.Fuc, SearchingParameters.Access.NeuAc, SearchingParameters.Access.NeuGc, SearchingParameters.Access.ComplexInclude, SearchingParameters.Access.HybridInclude, SearchingParameters.Access.MannoseInclude); glycanBuilder.Build(); int index = 1; foreach (string file in SearchingParameters.Access.MSMSFiles) { ReadingCounter = 0; ProgressCounter = 0; UpdateProgress(100); Readingprogress(100); UpdateSignal($"Searching...({index++}/{SearchingParameters.Access.MSMSFiles.Count})"); MultiThreadingSearch search = new MultiThreadingSearch(file, readerCounter, searchCounter, peptides, decoyPeptides, glycanBuilder); search.Run(); UpdateSignal("Analyzing..."); Analyze(file, search.Target(), search.Decoy(), glycanBuilder); } UpdateSignal("Done"); return(Task.CompletedTask); }
public void ParseFastaFile_EmptyFile_ThrowsArgumentException() { var fileReader = new TestFileService(); fileReader.fileLines = new List <string>(); var fastaReader = new FastaReader(fileReader); Assert.Throws <ArgumentException>(() => fastaReader.ParseFastaFile("path").ToList()); }
public void ParseFastaFile_ReturnsCorrectSequences() { var fileReader = new TestFileService(); var fastaReader = new FastaReader(fileReader); var sequences = fastaReader.ParseFastaFile("path").ToList(); Assert.Equal(10, sequences.Count); Assert.All(sequences, s => Assert.Equal(80, s.Length)); }
public void WriteFasta(string fasta_file, FastaWriter Writer) { bool MakeDecoy = false; if (Options.OutputType == DatabaseType.Target || Options.OutputType == DatabaseType.Concatenated) { MakeDecoy = false; } else if (Options.OutputType == DatabaseType.Decoy || Options.OutputType == DatabaseType.Concatenated) { MakeDecoy = true; } using (FastaReader reader = new FastaReader(fasta_file)) { foreach (Fasta fasta in reader.ReadNextFasta()) { Regex uniprotRegex = new Regex(@"(.+)\|(.+)\|(.+?)\s(.+?)\sOS=(.+?)(?:\sGN=(.+?))?(?:$|PE=(\d+)\sSV=(\d+))", RegexOptions.ExplicitCapture); Match UniprotMatch = uniprotRegex.Match(fasta.Description); string HeaderFile = "InvalidUniprotheaders.txt"; string headerFolder = Path.GetDirectoryName(Options.InputFiles[0]); if (Options.EnforceUniprot && !UniprotMatch.Success) { using (StreamWriter log = new StreamWriter(Path.Combine(headerFolder, HeaderFile), true)) { log.WriteLine("Invalid Header:"); log.WriteLine(); log.WriteLine(fasta.Description); log.WriteLine(); InvalidHeader(fasta); } } if (UniprotMatch.Success) { bool excludeMethionine = false; if (Options.ExcludeNTerminalMethionine && !Options.ExcludeNTerminalResidue) { excludeMethionine = true; } if (MakeDecoy) { Writer.Write(fasta.ToDecoy(Options.DecoyPrefix, Options.DecoyType, (excludeMethionine || Options.ExcludeNTerminalResidue), Options.ExcludeNTerminalMethionine)); } else { Writer.Write(fasta); } } } } }
static public void CheckUniqueness() { string fastaPath = @"D:\Genomes\Homo_sapiens\UCSC\hg19\Sequence\WholeGenomeFasta\genome.fa"; string[] Reads = new string[] { "AACCCTAACCCAACCCTAACCCTAACCCTAACCCT", // 10097 B "ACCCTAACCCAACCCTAACCCTAACCCTAACCCTA", // 10098 B "AGAGGACAACGCAGCTCCGCCCTCGCGGTGCTCTC", // 10553 A "TTTTTTCCTATACATACATACCCATGATAAAGTTT" // 30763880 A }; using (FastaReader readerA = new FastaReader(fastaPath)) { GenericRead chrA = new GenericRead(); while (true) { bool result = readerA.GetNextEntry(ref chrA); if (!result) { break; } Console.WriteLine(chrA.Name); string bases = chrA.Bases.ToUpperInvariant(); // Search for each: for (int readIndex = 0; readIndex < Reads.Length; readIndex++) { int pos = -1; while (true) { pos = bases.IndexOf(Reads[readIndex], pos + 1); if (pos == -1) { break; } Console.WriteLine("{0}\t{1}\t{2}\t{3}", readIndex, Reads[readIndex], chrA.Name, pos); } pos = -1; string revComp = Isas.Shared.Utilities.GetReverseComplement(Reads[readIndex]); while (true) { pos = bases.IndexOf(revComp, pos + 1); if (pos == -1) { break; } Console.WriteLine("{0}\t{1}\t{2}\t{3}\tRevComp", readIndex, Reads[readIndex], chrA.Name, pos); } } } } Console.WriteLine(">>>Done."); }
private List <string> GenerateGlycoPeptide() { List <ProteinInfo> PInfos = FastaReader.ReadFasta(_fastaFile); List <string> GlycoPeptide = new List <string>(); foreach (ProteinInfo Prot in PInfos) { GlycoPeptide.AddRange(Prot.NGlycopeptide(_MissCLeavage, _ProteaseType, _peptideMutation)); } return(GlycoPeptide); }
static public int ProcessReferenceFASTA(string fastaPathA, string fastaPathB) { GenericRead chrA = new GenericRead(); GenericRead chrB = new GenericRead(); long CountAB = 0; long CountA = 0; long CountB = 0; long CountNeither = 0; using (FastaReader readerA = new FastaReader(fastaPathA)) using (FastaReader readerB = new FastaReader(fastaPathB)) { readerA.GetNextEntry(ref chrA); // Discard chrM from new output while (true) { bool result = readerA.GetNextEntry(ref chrA); if (!result) { break; } readerB.GetNextEntry(ref chrB); if (chrA.Bases.Length != chrB.Bases.Length) { throw new Exception(); } for (int baseIndex = 0; baseIndex < chrA.Bases.Length; baseIndex++) { bool isUniqueA = chrA.Bases[baseIndex] < 'a'; bool isUniqueB = chrB.Bases[baseIndex] < 'a'; if (isUniqueA && isUniqueB) { CountAB++; } else if (isUniqueA && !isUniqueB) { CountA++; } else if (!isUniqueA && isUniqueB) { CountB++; } else { CountNeither++; } } Console.WriteLine("After {0}: {1},{2},{3},{4}", chrA.Name, CountAB, CountA, CountB, CountNeither); double percentAgreement = 100 * (CountAB + CountNeither) / (double)(CountAB + CountA + CountB + CountNeither); Console.WriteLine("Percent agreement: {0:F2}", percentAgreement); } } return(0); }
public void HandleMultipleFastaFormatWithEmptyLines() { FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCGGGCTAAT\nATCGGGCTAAT\n\n>Number2\nATCGGGCTAAT\nATCGGGCTAAT")); Fasta fasta = fastaReader.Read(); Assert.AreEqual("DNA", fasta.Header); Assert.AreEqual("ATCGGGCTAATATCGGGCTAAT", fasta.Dna); fasta = fastaReader.Read(); Assert.AreEqual("Number2", fasta.Header); Assert.AreEqual("ATCGGGCTAATATCGGGCTAAT", fasta.Dna); }
/// <summary> /// Input GlycanCompostion (From Glycan list) /// </summary> /// <param name="argStartScan"></param> /// <param name="argEndScan"></param> /// <param name="argMSMSTol"></param> /// <param name="argPrecursorTol"></param> /// <param name="argNGlycan"></param> /// <param name="argHuman"></param> /// <param name="argGlycanCompounds"></param> /// <param name="argMassGlycanMapping"></param> /// <param name="argGlycanCompoundMassList"></param> /// <param name="argRawFilePath"></param> /// <param name="argFastaFile"></param> /// <param name="argProteaseType"></param> /// <param name="argMissCleavage"></param> /// <param name="argAverageMass"></param> /// <param name="argUseHCD"></param> /// <param name="argExportFilename"></param> /// <param name="argGetTopRank"></param> /// <param name="argCompletedOnly"></param> /// <param name="argCompletedReward"></param> public frmInvokeProcesses(int argStartScan, int argEndScan, float argMSMSTol, float argPrecursorTol, bool argNGlycan, bool argHuman, List <GlycanCompound> argGlycanCompounds, Dictionary <double, GlycanCompound> argMassGlycanMapping, List <float> argGlycanCompoundMassList, string argGlycanFile, string argRawFilePath, string argFastaFile, List <Protease.Type> argProteaseType, int argMissCleavage, bool argAverageMass, bool argUseHCD, bool argSeqHCD, string argExportFile, int argGetTopRank, bool argCompletedOnly, float argCompletedReward ) { InitializeComponent(); AAMW = new AminoAcidMass(); _StartScan = argStartScan; _EndScan = argEndScan; _MSMSTol = argMSMSTol; _PrecursorTol = argPrecursorTol; _NGlycan = argNGlycan; _Human = argHuman; _GlycanCompounds = argGlycanCompounds; _MassGlycanMapping = argMassGlycanMapping; _GlycanCompoundMassList = argGlycanCompoundMassList; _glycanFile = argGlycanFile; _rawFilePath = argRawFilePath; Proteins = FastaReader.ReadFasta(argFastaFile); _fastaFile = argFastaFile; _ProteaseType = argProteaseType; _MissCLeavage = argMissCleavage; _AverageMass = argAverageMass; _UseGlycanList = true; _exportFile = argExportFile; _GetTopRank = argGetTopRank; _UseHCD = argUseHCD; _SeqHCD = argSeqHCD; _CompletedOnly = argCompletedOnly; _CompletedReward = argCompletedReward; WorkThread = new Thread(RunWork); WorkThread.Start(); }
/// <summary> /// Input Number of Glycans (blind search) /// </summary> /// <param name="argStartScan"></param> /// <param name="argEndScan"></param> /// <param name="argMSMSTol"></param> /// <param name="argPrecursorTol"></param> /// <param name="argNGlycan"></param> /// <param name="argHuman"></param> /// <param name="argNoHexNAc"></param> /// <param name="argNoHex"></param> /// <param name="argNoDeHex"></param> /// <param name="argNoSia"></param> /// <param name="argRawFilePath"></param> /// <param name="argFastaFile"></param> /// <param name="argProteaseType"></param> /// <param name="argMissCleavage"></param> /// <param name="argAverageMass"></param> /// <param name="argUseHCD"></param> /// <param name="argExportFilename"></param> /// <param name="argGetTopRank"></param> /// <param name="argCompletedOnly"></param> /// <param name="argCompletedReward"></param> public frmInvokeProcesses(int argStartScan, int argEndScan, float argMSMSTol, float argPrecursorTol, bool argNGlycan, bool argHuman, int argNoHexNAc, int argNoHex, int argNoDeHex, int argNoSia, string argRawFilePath, string argFastaFile, List <Protease.Type> argProteaseType, int argMissCleavage, bool argAverageMass, bool argUseHCD, bool argSeqHCD, string argExportFile, int argGetTopRank, bool argCompletedOnly, float argCompletedReward ) { InitializeComponent(); AAMW = new AminoAcidMass(); _StartScan = argStartScan; _EndScan = argEndScan; _MSMSTol = argMSMSTol; _PrecursorTol = argPrecursorTol; _NGlycan = argNGlycan; _Human = argHuman; _NoHexNAc = argNoHexNAc; _NoHex = argNoHex; _NoDeHex = argNoDeHex; _NoSia = argNoSia; _rawFilePath = argRawFilePath; Proteins = FastaReader.ReadFasta(argFastaFile); _fastaFile = argFastaFile; _ProteaseType = argProteaseType; _MissCLeavage = argMissCleavage; _AverageMass = argAverageMass; _UseGlycanList = false; _UseHCD = argUseHCD; _SeqHCD = argSeqHCD; _CompletedOnly = argCompletedOnly; _CompletedReward = argCompletedReward; _exportFile = argExportFile; _GetTopRank = argGetTopRank; WorkThread = new Thread(RunWork); WorkThread.Start(); }
public void ParseFastaFile_NoSequencesInFile_ThrowsArgumentException() { var fileReader = new TestFileService(); fileReader.fileLines = new List <string>() { "this is definitely not a fasta file", "ATCGCTGJGJGNVBCACABJP", "dvdfvdf fsdfs dfsdfsgdf" }; var fastaReader = new FastaReader(fileReader); Assert.Throws <ArgumentException>(() => fastaReader.ParseFastaFile("path").ToList()); }
private static FastaSequence GetFastaSequence(string fastaPath, IDictionary <string, IChromosome> refNameToChromosome) { var references = new List <FastaSequence>(); FastaReader.AddReferenceSequences(new GZipStream(FileUtilities.GetReadStream(fastaPath), CompressionMode.Decompress), refNameToChromosome, references); if (references.Count != 1) { throw new InvalidDataException($"Expected 1 reference, but found {references.Count} references."); } var reference = references[0]; int length = _endPosition - _beginPosition + 1; string substring = reference.Bases.Substring(_beginPosition - 1, length); return(new FastaSequence(reference.Chromosome, substring)); }