public CollectedGeneBankData ReadGenebankFile(string inputFile) { //Split path to get file name. GeneCollection geneCollection = new GeneCollection(); CodingSequenceCollection codingSequenceCollection = new CodingSequenceCollection(); string fileName = Path.GetFileName(inputFile); string organism = ""; string accession = ""; string length = ""; string originSequence = ""; //Booleans for if/else statements. bool isFirst = true; bool isOrigin = false; bool currentEntryIsCDS = false; bool currentEntryIsGene = false; //Both patterns check if both complement and non-complement entries are present. string genePattern = " *gene *(complement)?\\(?\\d*\\.\\.\\d*\\)?"; string cdsPattern = " *CDS *(complement)?\\(?\\d*\\.\\.\\d*\\)?"; string currentEntry = ""; StreamReader reader = new StreamReader(inputFile); string gbkLine; while ((gbkLine = reader.ReadLine()) != null) { //All comming lines contain nucleotide data which can be added to the origin sequence. if (isOrigin) { originSequence += Regex.Replace(gbkLine, "(\\d| )", ""); } //Only occurs untill first entry is false. if (isFirst) { if (gbkLine.StartsWith("LOCUS")) { length = GetSequenceLength(gbkLine); } if (gbkLine.Contains(" ORGANISM")) { organism = GetOrganism(gbkLine); } if (gbkLine.Contains("ACCESSION")) { accession = GetAccessionId(gbkLine); } } //Check if if (currentEntryIsCDS && !Regex.IsMatch(gbkLine, genePattern)) { currentEntry += gbkLine + "\n"; } else if (currentEntryIsCDS && Regex.IsMatch(gbkLine, genePattern)) { currentEntryIsGene = true; currentEntryIsCDS = false; CodingSequence codingSequence = CreateCodingSequenceEntry(currentEntry); codingSequenceCollection.AddCodingSequence(codingSequence); currentEntry = gbkLine + "\n"; } else if (currentEntryIsGene && !Regex.IsMatch(gbkLine, cdsPattern)) { currentEntry += gbkLine + "\n"; } else if (currentEntryIsGene && Regex.IsMatch(gbkLine, cdsPattern)) { currentEntryIsGene = false; currentEntryIsCDS = true; Gene gene = CreateGeneEntry(currentEntry); geneCollection.AddGene(gene); currentEntry = gbkLine + "\n"; } else if (isFirst && Regex.IsMatch(gbkLine, genePattern)) { currentEntryIsGene = true; isFirst = false; currentEntry += gbkLine + "\n"; } else if (isFirst && Regex.IsMatch(gbkLine, cdsPattern)) { currentEntryIsCDS = true; isFirst = false; currentEntry += gbkLine + "\n"; } if (gbkLine.StartsWith("ORIGIN")) { //Set isOrigin to true: first if statement will be handled. isOrigin = true; string line = gbkLine.Replace("ORIGIN", ""); originSequence += Regex.Replace(line, "(\\d| )", ""); if (currentEntryIsCDS) { currentEntryIsCDS = false; CodingSequence codingSequence = CreateCodingSequenceEntry(currentEntry); codingSequenceCollection.AddCodingSequence(codingSequence); } else if (currentEntryIsGene) { currentEntryIsGene = false; Gene gene = CreateGeneEntry(currentEntry); geneCollection.AddGene(gene); } } } int geneCount = geneCollection.collection.Count; //Size of gene collection int cdsCount = codingSequenceCollection.collection.Count; //Size of coding sequence collection double totalGeneCounter = 0.0; double forwardGeneCounter = 0.0; foreach (Gene geneEntry in geneCollection.collection) { if (!geneEntry.IsReverse) { totalGeneCounter++; forwardGeneCounter++; } else { totalGeneCounter++; } } //Forward/Reverse (FR) ratio calculation. double value = (forwardGeneCounter / totalGeneCounter); double forwardReverseBalance = Math.Round(value, 1); //For each gene: if gene isForward or !isReverse > +1 to total and foward //else +1 to total Summary summary = new Summary(fileName, organism, accession, length, geneCount, forwardReverseBalance, cdsCount, originSequence); CollectedGeneBankData geneBankeData = new CollectedGeneBankData(geneCollection, codingSequenceCollection, summary); return(geneBankeData); }
static void Main(string[] args) { var options = new Options(); if (args.Length != 0) { Parser parser = new Parser(); FileReader reader = new FileReader(); GeneBankDisplayFetcher displayFetcher = new GeneBankDisplayFetcher(); try { if (parser.ParseArguments(args, options)) { //Match to regex. Checks if file ends with .gbk string gbkFile = options.InputFile; if (File.Exists(gbkFile) && gbkFile.EndsWith(".gb") || gbkFile.EndsWith(".gbk")) { CollectedGeneBankData geneBankData = reader.ReadGenebankFile(gbkFile); GeneCollection geneCollection = geneBankData.geneCollection; Summary summary = geneBankData.summary; CodingSequenceCollection cdsCollection = geneBankData.codingSequenceCollection; if (options.Summary) { // get summary from data fetcher displayFetcher.FetchSummary(geneBankData.summary); } string[] geneArray = options.FetchGenes; if (geneArray != null && geneArray.Length != 0) { Console.WriteLine("Fetching gene sequences..."); displayFetcher.FetchGeneDisplay(geneCollection, geneArray, summary.OriginSequence); //give options.GenesToFetch to dataFetcher and return the required data. } if (options.FetchCDS != null && options.FetchCDS.Length != 0) { Console.WriteLine("Fetching cds product sequences..."); string[] cdsArray = options.FetchCDS; displayFetcher.FetchCDSs(cdsCollection, cdsArray); //give options.CDSsToFetch to dataFetcher and return the required data. } if (options.FetchFeatures != null && options.FetchFeatures.Length != 0) { Console.WriteLine("Fetching gene location features..."); string[] featureArray = options.FetchFeatures; displayFetcher.FetchFeatures(geneCollection, cdsCollection, featureArray); //give options.FeaturesToFetch to dataFetcher and return the required data. } if (options.FetchSites != null && options.FetchSites.Length != 0) { Console.WriteLine("Fetching given nucleotide sites..."); string[] siteArray = options.FetchSites; displayFetcher.FetchSites(geneCollection, geneBankData.summary.OriginSequence, siteArray); //give options.SitesToFetch to dataFetcher and return the required data. } Console.ReadKey(); } else { Console.WriteLine("Given file " + gbkFile + " seems to not exist on your computer. Please check your input."); Console.WriteLine("Press any key to close the console."); Console.ReadKey(); } } else { Console.WriteLine("The commandline parser could not find any arguments. Please use the --help function for options."); Console.WriteLine("Press any key to close the console."); Console.WriteLine(parser.ParseArguments(args, options)); Console.WriteLine(options.InputFile); Console.WriteLine(args[0] + " " + args[1]); Console.ReadKey(); } } catch (Exception e) { Console.WriteLine("Encounted an exception while parsing." + e + "\nPlease check your cmd arguments and try again, or use the --help function."); Console.WriteLine("Press any key to close the console."); Console.ReadKey(); } } else { Console.WriteLine("No arguments we're provided."); options.GetHelp(); Console.WriteLine("Press any key to close the console."); Console.ReadKey(); } }