public void FetchCDSs(CodingSequenceCollection cdsCollection, string[] cdsArray) { GbkFeatureFetcher fetcher = new GbkFeatureFetcher(); CodingSequenceCollection updatedCodingSequenceCollection = fetcher.FetchCodingSequenceData(cdsCollection, cdsArray); foreach (CodingSequence codingSequence in updatedCodingSequenceCollection.collection) { string cdsMesasge = ""; if (codingSequence.GeneID != "") { cdsMesasge += ">CDS " + codingSequence.GeneID + " sequence" + newLine; } else { cdsMesasge += ">CDS " + codingSequence.LocusTag + " sequence" + newLine; } string sequence = codingSequence.TranslatedSequence; int length = sequence.Length; for (int start = 0; start < length;) { int end = start + 80; if (end < length) { cdsMesasge += sequence.Substring(start, 80) + newLine; } else { cdsMesasge += sequence.Substring(start, (length - start)); } start += 80; } Console.WriteLine(cdsMesasge); } Console.WriteLine(newLine); }
public void FetchFeatures(GeneCollection geneCollection, CodingSequenceCollection cdsCollection, string[] featureArray) { Console.Write("FEATURE;TYPE;START;STOP;ORIENTATION"); GbkFeatureFetcher fetcher = new GbkFeatureFetcher(); SiteFeatureCollection siteFeatureCollection = fetcher.FetchSiteFeatures(geneCollection, cdsCollection, featureArray); foreach (SiteFeature siteFeature in siteFeatureCollection.collection) { Console.WriteLine(siteFeature.GeneID + ";" + siteFeature.Type + ";" + siteFeature.StartCoordinate + ";" + siteFeature.EndCoordinate + ";" + siteFeature.Orientation); } Console.WriteLine(newLine); }
public CodingSequenceCollection FetchCodingSequenceData(CodingSequenceCollection codingSequenceCollection, string[] cdsArray) { List <string> nonMatchedCds = new List <string>(); CodingSequenceCollection updatedCodingSequenceCollection = new CodingSequenceCollection(); foreach (string givenCds in cdsArray) { bool isMatch = false; foreach (CodingSequence codingSequence in codingSequenceCollection.collection) { if (codingSequence.GeneProduct.Equals(givenCds)) { updatedCodingSequenceCollection.AddCodingSequence(codingSequence); isMatch = true; } } if (!isMatch) { nonMatchedCds.Add(givenCds); } } //If all CDS entries did not match: if (!updatedCodingSequenceCollection.collection.Any()) { Console.WriteLine("None of the provided entries could be found."); } else if (!nonMatchedCds.Any()) { //Display list of CDS entries that did not match. String message = ""; foreach (String cds in nonMatchedCds) { if (message == "") { message += "Some of the provided CDS could not be found. Here is the list of CDS; "; message += cds; } else { message += ", " + cds; } } Console.WriteLine("\n" + message); } updatedCodingSequenceCollection.Sort(); return(updatedCodingSequenceCollection); }
/** * Fetches data from GeneCollection and CodingSequenceCollection. * @returns siteFeatureCollection containing SiteFeature objects. */ public SiteFeatureCollection FetchSiteFeatures(GeneCollection geneCollection, CodingSequenceCollection cdsCollection, string[] featureCoordinateArray) { SiteFeatureCollection siteFeatureCollection = new SiteFeatureCollection(); List <string> nonMatchedFeatures = new List <string>(); foreach (string coordinate in featureCoordinateArray) { bool isMatch = false; if (Regex.IsMatch(coordinate, "\\d*\\.\\.\\d*")) { string[] split = Regex.Split(coordinate, "\\.\\."); int startCoordinate = Int32.Parse(split[0]); int endCoordinate = Int32.Parse(split[1]); for (int i = 0; i < geneCollection.collection.Count; i++) { string geneId = ""; string type = ""; string orientation = ""; Gene gene = geneCollection.collection[i]; CodingSequence cds = cdsCollection.collection[i]; if (gene.StartCoordinate >= startCoordinate && gene.EndCoordinate <= endCoordinate) { if (gene.ID != "") { geneId = gene.ID; } else { geneId = gene.LocusTag; } type = "gene"; int geneStartCoordinate = gene.StartCoordinate; int geneStopCoordinate = gene.EndCoordinate; if (gene.IsReverse) { orientation = "R"; } else { orientation = "F"; } SiteFeature feature = new SiteFeature(geneId, type, geneStartCoordinate, geneStopCoordinate, orientation); siteFeatureCollection.AddSiteFeature(feature); //Change type and change geneId to geneProduct for a CDS entry. Other values are similar to Gene values. string product = cds.GeneProduct; type = "CDS"; feature = new SiteFeature(product, type, geneStartCoordinate, geneStopCoordinate, orientation); siteFeatureCollection.AddSiteFeature(feature); isMatch = true; } } if (!isMatch) { nonMatchedFeatures.Add(coordinate); } } } if (!siteFeatureCollection.collection.Any() || !nonMatchedFeatures.Any()) { //Display list of site entries that did not match. String message = ""; foreach (String feature in nonMatchedFeatures) { if (message != "") { message += "Some of the provided enrties could not be found. Here is the list of sites; "; message += feature; } else { message += ", " + feature; } } Console.WriteLine("\n" + message); } siteFeatureCollection.Sort(); return(siteFeatureCollection); }
public CollectedGeneBankData(GeneCollection collectedGenes, CodingSequenceCollection cdsCollection, Summary sum) { geneCollection = collectedGenes; codingSequenceCollection = cdsCollection; summary = sum; }
public CollectedGeneBankData ReadGenebankFile(string inputFile) { //Split path to get file name. GeneCollection geneCollection = new GeneCollection(); CodingSequenceCollection codingSequenceCollection = new CodingSequenceCollection(); string fileName = Path.GetFileName(inputFile); string organism = ""; string accession = ""; string length = ""; string originSequence = ""; //Booleans for if/else statements. bool isFirst = true; bool isOrigin = false; bool currentEntryIsCDS = false; bool currentEntryIsGene = false; //Both patterns check if both complement and non-complement entries are present. string genePattern = " *gene *(complement)?\\(?\\d*\\.\\.\\d*\\)?"; string cdsPattern = " *CDS *(complement)?\\(?\\d*\\.\\.\\d*\\)?"; string currentEntry = ""; StreamReader reader = new StreamReader(inputFile); string gbkLine; while ((gbkLine = reader.ReadLine()) != null) { //All comming lines contain nucleotide data which can be added to the origin sequence. if (isOrigin) { originSequence += Regex.Replace(gbkLine, "(\\d| )", ""); } //Only occurs untill first entry is false. if (isFirst) { if (gbkLine.StartsWith("LOCUS")) { length = GetSequenceLength(gbkLine); } if (gbkLine.Contains(" ORGANISM")) { organism = GetOrganism(gbkLine); } if (gbkLine.Contains("ACCESSION")) { accession = GetAccessionId(gbkLine); } } //Check if if (currentEntryIsCDS && !Regex.IsMatch(gbkLine, genePattern)) { currentEntry += gbkLine + "\n"; } else if (currentEntryIsCDS && Regex.IsMatch(gbkLine, genePattern)) { currentEntryIsGene = true; currentEntryIsCDS = false; CodingSequence codingSequence = CreateCodingSequenceEntry(currentEntry); codingSequenceCollection.AddCodingSequence(codingSequence); currentEntry = gbkLine + "\n"; } else if (currentEntryIsGene && !Regex.IsMatch(gbkLine, cdsPattern)) { currentEntry += gbkLine + "\n"; } else if (currentEntryIsGene && Regex.IsMatch(gbkLine, cdsPattern)) { currentEntryIsGene = false; currentEntryIsCDS = true; Gene gene = CreateGeneEntry(currentEntry); geneCollection.AddGene(gene); currentEntry = gbkLine + "\n"; } else if (isFirst && Regex.IsMatch(gbkLine, genePattern)) { currentEntryIsGene = true; isFirst = false; currentEntry += gbkLine + "\n"; } else if (isFirst && Regex.IsMatch(gbkLine, cdsPattern)) { currentEntryIsCDS = true; isFirst = false; currentEntry += gbkLine + "\n"; } if (gbkLine.StartsWith("ORIGIN")) { //Set isOrigin to true: first if statement will be handled. isOrigin = true; string line = gbkLine.Replace("ORIGIN", ""); originSequence += Regex.Replace(line, "(\\d| )", ""); if (currentEntryIsCDS) { currentEntryIsCDS = false; CodingSequence codingSequence = CreateCodingSequenceEntry(currentEntry); codingSequenceCollection.AddCodingSequence(codingSequence); } else if (currentEntryIsGene) { currentEntryIsGene = false; Gene gene = CreateGeneEntry(currentEntry); geneCollection.AddGene(gene); } } } int geneCount = geneCollection.collection.Count; //Size of gene collection int cdsCount = codingSequenceCollection.collection.Count; //Size of coding sequence collection double totalGeneCounter = 0.0; double forwardGeneCounter = 0.0; foreach (Gene geneEntry in geneCollection.collection) { if (!geneEntry.IsReverse) { totalGeneCounter++; forwardGeneCounter++; } else { totalGeneCounter++; } } //Forward/Reverse (FR) ratio calculation. double value = (forwardGeneCounter / totalGeneCounter); double forwardReverseBalance = Math.Round(value, 1); //For each gene: if gene isForward or !isReverse > +1 to total and foward //else +1 to total Summary summary = new Summary(fileName, organism, accession, length, geneCount, forwardReverseBalance, cdsCount, originSequence); CollectedGeneBankData geneBankeData = new CollectedGeneBankData(geneCollection, codingSequenceCollection, summary); return(geneBankeData); }
static void Main(string[] args) { var options = new Options(); if (args.Length != 0) { Parser parser = new Parser(); FileReader reader = new FileReader(); GeneBankDisplayFetcher displayFetcher = new GeneBankDisplayFetcher(); try { if (parser.ParseArguments(args, options)) { //Match to regex. Checks if file ends with .gbk string gbkFile = options.InputFile; if (File.Exists(gbkFile) && gbkFile.EndsWith(".gb") || gbkFile.EndsWith(".gbk")) { CollectedGeneBankData geneBankData = reader.ReadGenebankFile(gbkFile); GeneCollection geneCollection = geneBankData.geneCollection; Summary summary = geneBankData.summary; CodingSequenceCollection cdsCollection = geneBankData.codingSequenceCollection; if (options.Summary) { // get summary from data fetcher displayFetcher.FetchSummary(geneBankData.summary); } string[] geneArray = options.FetchGenes; if (geneArray != null && geneArray.Length != 0) { Console.WriteLine("Fetching gene sequences..."); displayFetcher.FetchGeneDisplay(geneCollection, geneArray, summary.OriginSequence); //give options.GenesToFetch to dataFetcher and return the required data. } if (options.FetchCDS != null && options.FetchCDS.Length != 0) { Console.WriteLine("Fetching cds product sequences..."); string[] cdsArray = options.FetchCDS; displayFetcher.FetchCDSs(cdsCollection, cdsArray); //give options.CDSsToFetch to dataFetcher and return the required data. } if (options.FetchFeatures != null && options.FetchFeatures.Length != 0) { Console.WriteLine("Fetching gene location features..."); string[] featureArray = options.FetchFeatures; displayFetcher.FetchFeatures(geneCollection, cdsCollection, featureArray); //give options.FeaturesToFetch to dataFetcher and return the required data. } if (options.FetchSites != null && options.FetchSites.Length != 0) { Console.WriteLine("Fetching given nucleotide sites..."); string[] siteArray = options.FetchSites; displayFetcher.FetchSites(geneCollection, geneBankData.summary.OriginSequence, siteArray); //give options.SitesToFetch to dataFetcher and return the required data. } Console.ReadKey(); } else { Console.WriteLine("Given file " + gbkFile + " seems to not exist on your computer. Please check your input."); Console.WriteLine("Press any key to close the console."); Console.ReadKey(); } } else { Console.WriteLine("The commandline parser could not find any arguments. Please use the --help function for options."); Console.WriteLine("Press any key to close the console."); Console.WriteLine(parser.ParseArguments(args, options)); Console.WriteLine(options.InputFile); Console.WriteLine(args[0] + " " + args[1]); Console.ReadKey(); } } catch (Exception e) { Console.WriteLine("Encounted an exception while parsing." + e + "\nPlease check your cmd arguments and try again, or use the --help function."); Console.WriteLine("Press any key to close the console."); Console.ReadKey(); } } else { Console.WriteLine("No arguments we're provided."); options.GetHelp(); Console.WriteLine("Press any key to close the console."); Console.ReadKey(); } }