コード例 #1
0
        internal void FetchGeneDisplay(GeneCollection geneCollection, string[] geneArray, string originSequence)
        {
            GbkFeatureFetcher fetcher = new GbkFeatureFetcher();
            GeneCollection    updatedGeneCollection = fetcher.FetchGeneData(geneCollection, geneArray, originSequence);

            foreach (Gene gene in updatedGeneCollection.collection)
            {
                String geneMessage = "";
                if (gene.ID != "")
                {
                    geneMessage += ">" + gene.ID + newLine;
                }
                else
                {
                    geneMessage += ">" + gene.LocusTag + newLine;
                }            //Print sequence in substrings of 80.
                String sequence = gene.Sequence;
                int    length   = sequence.Length;
                for (int start = 0; start < length;)
                {
                    int end = start + 80;
                    if (end < length)
                    {
                        geneMessage += sequence.Substring(start, 80) + newLine;
                    }
                    else
                    {
                        geneMessage += sequence.Substring(start, (length - start));
                    }
                    start += 80;
                }
                Console.WriteLine(geneMessage);
            }
            Console.WriteLine(newLine);
        }
コード例 #2
0
        public void FetchFeatures(GeneCollection geneCollection, CodingSequenceCollection cdsCollection, string[] featureArray)
        {
            Console.Write("FEATURE;TYPE;START;STOP;ORIENTATION");
            GbkFeatureFetcher     fetcher = new GbkFeatureFetcher();
            SiteFeatureCollection siteFeatureCollection = fetcher.FetchSiteFeatures(geneCollection, cdsCollection, featureArray);

            foreach (SiteFeature siteFeature in siteFeatureCollection.collection)
            {
                Console.WriteLine(siteFeature.GeneID + ";" + siteFeature.Type + ";" + siteFeature.StartCoordinate + ";" + siteFeature.EndCoordinate + ";" + siteFeature.Orientation);
            }
            Console.WriteLine(newLine);
        }
コード例 #3
0
        public GeneCollection FetchGeneData(GeneCollection geneCollection, string[] geneArray, string originSequence)
        {
            List <string>  nonMatchedGenes       = new List <string>();
            GeneCollection updatedGeneCollection = new GeneCollection();

            foreach (String givenGene in geneArray)
            {
                Boolean isMatch = false;
                foreach (Gene gene in geneCollection.collection)
                {
                    if (gene.ID.Contains(givenGene) || gene.LocusTag.Contains(givenGene))
                    {
                        int    start              = gene.StartCoordinate;
                        int    distance           = gene.EndCoordinate - gene.StartCoordinate;
                        string nucleotideSequence = originSequence.Substring(start, distance);
                        gene.Sequence = nucleotideSequence;
                        updatedGeneCollection.AddGene(gene);
                        isMatch = true;
                        break;
                    }
                }
                //If no match was found: add to nonMatchedGenes list.
                if (!isMatch)
                {
                    nonMatchedGenes.Add(givenGene);
                }
            }
            //If all or some geneId entries did not match:\
            if (nonMatchedGenes.Any())
            {
                //Display list of gene entries that did not match.
                String message = "";
                foreach (String gene in nonMatchedGenes)
                {
                    if (message.Length == 0)
                    {
                        message += "Some of the provided enrties could not be found. Here is the list of genes; ";
                        message += gene;
                    }
                    else
                    {
                        message += ", " + gene;
                    }
                }
                Console.WriteLine("\n" + message);
            }
            updatedGeneCollection.SortOnID();
            return(updatedGeneCollection);
        }
コード例 #4
0
        public void FetchSites(GeneCollection geneCollection, string originSequence, string[] siteArray)
        {
            Console.Write("POSITION;SEQUENCE;GENE");
            GbkFeatureFetcher           fetcher = new GbkFeatureFetcher();
            List <SearchSiteCollection> list    = fetcher.FetchSearchSiteData(geneCollection, originSequence, siteArray);

            foreach (SearchSiteCollection searchSiteCollection in list)
            {
                foreach (SearchSite searchSite in searchSiteCollection.collection)
                {
                    Console.WriteLine(searchSite.StartPosition + ";" + searchSite.Site + ";" + searchSite.GeneName);
                }
            }
            Console.WriteLine(newLine);
        }
コード例 #5
0
        public List <SearchSiteCollection> FetchSearchSiteData(GeneCollection geneCollection, string originSequence, string[] siteArray)
        {
            Dictionary <string, string> iupacTable    = CreateIupacTable();
            List <SearchSiteCollection> finalSiteList = new List <SearchSiteCollection>();
            List <string> nonMatchedSites             = new List <string>();
            string        upperOriginSequence         = originSequence.ToUpper();

            foreach (string site in siteArray)
            {
                SearchSiteCollection searchSiteCollection = new SearchSiteCollection();
                string[]             nucleotideSites      = site.Split();
                List <string>        targetSequences      = new List <string>();
                bool isMatch = false;
                foreach (char nucleotideSite in site)
                {
                    string nucSite = nucleotideSite.ToString();
                    if (iupacTable.ContainsKey(nucSite))
                    {
                        String newNucleotides = iupacTable[nucSite];
                        String regex          = "[" + newNucleotides + "]";
                        foreach (char newNuc in newNucleotides)
                        {
                            string nuc            = newNuc.ToString();
                            string targetSequence = site.Replace(nucSite, nuc);
                            targetSequences.Add(targetSequence);
                        }
                        searchSiteCollection.searchSiteRegexMessage = "site search: " + nucSite + " (regex: " + nucSite.Replace(nucSite, regex) + ")";
                    }
                }
                foreach (String targetSequence in targetSequences)
                {
                    int index = 0;
                    while (index >= 0 && index != upperOriginSequence.Length)
                    {
                        index++;
                        index = upperOriginSequence.IndexOf(targetSequence, index);
                        String strIndex = "" + (index + 1);
                        if (index != -1)
                        {
                            String geneName = "INTERGENIC";
                            foreach (Gene gene in geneCollection.collection)
                            {
                                if (index >= gene.StartCoordinate && index <= gene.EndCoordinate)
                                {
                                    //Index + 1 because coding index starts at zero and sequence index starts at 1.
                                    if (gene.ID != "")
                                    {
                                        geneName = gene.ID;
                                    }
                                    else
                                    {
                                        geneName = gene.LocusTag;
                                    }
                                    isMatch = true;
                                }
                            }
                            SearchSite searchSite = new SearchSite(strIndex, targetSequence, geneName);
                            if (searchSiteCollection.collection.Any())
                            {
                                bool newEntry = true;
                                foreach (SearchSite siteEntry in searchSiteCollection.collection)
                                {
                                    if (siteEntry.StartPosition.Equals(strIndex))
                                    {
                                        if (siteEntry.Site.Equals("INTERGENIC") && !geneName.Equals("INTERGENIC"))
                                        {
                                            siteEntry.GeneName = geneName;
                                            newEntry           = false;
                                            break;
                                        }
                                    }
                                }
                                if (newEntry)
                                {
                                    searchSiteCollection.AddSearchSite(searchSite);
                                }
                            }
                            else
                            {
                                searchSiteCollection.AddSearchSite(searchSite);
                            }
                        }
                    }
                }
                finalSiteList.Add(searchSiteCollection);
                if (!isMatch)
                {
                    nonMatchedSites.Add(site);
                }
            }
            //If all site entries did not match:
            if (nonMatchedSites.Any())
            {
                //Display list of nucleotide site entries that did not match.
                String message = "";
                foreach (String site in nonMatchedSites)
                {
                    if (message == "")
                    {
                        message += "Some of the provided entries could not be found. Here is the list of nucleotide sites; ";
                        message += site;
                    }
                    else
                    {
                        message += ", " + site;
                    }
                }
                Console.WriteLine("\n" + message);
            }
            return(finalSiteList);
        }
コード例 #6
0
        /**
         * Fetches data from GeneCollection and CodingSequenceCollection.
         * @returns siteFeatureCollection containing SiteFeature objects.
         */
        public SiteFeatureCollection FetchSiteFeatures(GeneCollection geneCollection, CodingSequenceCollection cdsCollection, string[] featureCoordinateArray)
        {
            SiteFeatureCollection siteFeatureCollection = new SiteFeatureCollection();
            List <string>         nonMatchedFeatures    = new List <string>();

            foreach (string coordinate in featureCoordinateArray)
            {
                bool isMatch = false;
                if (Regex.IsMatch(coordinate, "\\d*\\.\\.\\d*"))
                {
                    string[] split           = Regex.Split(coordinate, "\\.\\.");
                    int      startCoordinate = Int32.Parse(split[0]);
                    int      endCoordinate   = Int32.Parse(split[1]);
                    for (int i = 0; i < geneCollection.collection.Count; i++)
                    {
                        string         geneId      = "";
                        string         type        = "";
                        string         orientation = "";
                        Gene           gene        = geneCollection.collection[i];
                        CodingSequence cds         = cdsCollection.collection[i];
                        if (gene.StartCoordinate >= startCoordinate && gene.EndCoordinate <= endCoordinate)
                        {
                            if (gene.ID != "")
                            {
                                geneId = gene.ID;
                            }
                            else
                            {
                                geneId = gene.LocusTag;
                            }
                            type = "gene";
                            int geneStartCoordinate = gene.StartCoordinate;
                            int geneStopCoordinate  = gene.EndCoordinate;
                            if (gene.IsReverse)
                            {
                                orientation = "R";
                            }
                            else
                            {
                                orientation = "F";
                            }
                            SiteFeature feature = new SiteFeature(geneId, type, geneStartCoordinate, geneStopCoordinate, orientation);
                            siteFeatureCollection.AddSiteFeature(feature);
                            //Change type and change geneId to geneProduct for a CDS entry. Other values are similar to Gene values.
                            string product = cds.GeneProduct;
                            type    = "CDS";
                            feature = new SiteFeature(product, type, geneStartCoordinate, geneStopCoordinate, orientation);
                            siteFeatureCollection.AddSiteFeature(feature);
                            isMatch = true;
                        }
                    }
                    if (!isMatch)
                    {
                        nonMatchedFeatures.Add(coordinate);
                    }
                }
            }
            if (!siteFeatureCollection.collection.Any() || !nonMatchedFeatures.Any())
            {
                //Display list of site entries that did not match.
                String message = "";
                foreach (String feature in nonMatchedFeatures)
                {
                    if (message != "")
                    {
                        message += "Some of the provided enrties could not be found. Here is the list of sites; ";
                        message += feature;
                    }
                    else
                    {
                        message += ", " + feature;
                    }
                }
                Console.WriteLine("\n" + message);
            }
            siteFeatureCollection.Sort();
            return(siteFeatureCollection);
        }
コード例 #7
0
 public CollectedGeneBankData(GeneCollection collectedGenes, CodingSequenceCollection cdsCollection, Summary sum)
 {
     geneCollection           = collectedGenes;
     codingSequenceCollection = cdsCollection;
     summary = sum;
 }
コード例 #8
0
        public CollectedGeneBankData ReadGenebankFile(string inputFile)
        {
            //Split path to get file name.
            GeneCollection           geneCollection           = new GeneCollection();
            CodingSequenceCollection codingSequenceCollection = new CodingSequenceCollection();
            string fileName       = Path.GetFileName(inputFile);
            string organism       = "";
            string accession      = "";
            string length         = "";
            string originSequence = "";

            //Booleans for if/else statements.
            bool isFirst            = true;
            bool isOrigin           = false;
            bool currentEntryIsCDS  = false;
            bool currentEntryIsGene = false;
            //Both patterns check if both complement and non-complement entries are present.
            string genePattern = " *gene *(complement)?\\(?\\d*\\.\\.\\d*\\)?";
            string cdsPattern  = " *CDS *(complement)?\\(?\\d*\\.\\.\\d*\\)?";

            string       currentEntry = "";
            StreamReader reader       = new StreamReader(inputFile);
            string       gbkLine;

            while ((gbkLine = reader.ReadLine()) != null)
            {
                //All comming lines contain nucleotide data which can be added to the origin sequence.
                if (isOrigin)
                {
                    originSequence += Regex.Replace(gbkLine, "(\\d| )", "");
                }
                //Only occurs untill first entry is false.
                if (isFirst)
                {
                    if (gbkLine.StartsWith("LOCUS"))
                    {
                        length = GetSequenceLength(gbkLine);
                    }
                    if (gbkLine.Contains("  ORGANISM"))
                    {
                        organism = GetOrganism(gbkLine);
                    }
                    if (gbkLine.Contains("ACCESSION"))
                    {
                        accession = GetAccessionId(gbkLine);
                    }
                }
                //Check if
                if (currentEntryIsCDS && !Regex.IsMatch(gbkLine, genePattern))
                {
                    currentEntry += gbkLine + "\n";
                }
                else if (currentEntryIsCDS && Regex.IsMatch(gbkLine, genePattern))
                {
                    currentEntryIsGene = true;
                    currentEntryIsCDS  = false;
                    CodingSequence codingSequence = CreateCodingSequenceEntry(currentEntry);
                    codingSequenceCollection.AddCodingSequence(codingSequence);
                    currentEntry = gbkLine + "\n";
                }
                else if (currentEntryIsGene && !Regex.IsMatch(gbkLine, cdsPattern))
                {
                    currentEntry += gbkLine + "\n";
                }
                else if (currentEntryIsGene && Regex.IsMatch(gbkLine, cdsPattern))
                {
                    currentEntryIsGene = false;
                    currentEntryIsCDS  = true;
                    Gene gene = CreateGeneEntry(currentEntry);
                    geneCollection.AddGene(gene);
                    currentEntry = gbkLine + "\n";
                }
                else if (isFirst && Regex.IsMatch(gbkLine, genePattern))
                {
                    currentEntryIsGene = true;
                    isFirst            = false;
                    currentEntry      += gbkLine + "\n";
                }
                else if (isFirst && Regex.IsMatch(gbkLine, cdsPattern))
                {
                    currentEntryIsCDS = true;
                    isFirst           = false;
                    currentEntry     += gbkLine + "\n";
                }
                if (gbkLine.StartsWith("ORIGIN"))
                {
                    //Set isOrigin to true: first if statement will be handled.
                    isOrigin = true;
                    string line = gbkLine.Replace("ORIGIN", "");
                    originSequence += Regex.Replace(line, "(\\d| )", "");
                    if (currentEntryIsCDS)
                    {
                        currentEntryIsCDS = false;
                        CodingSequence codingSequence = CreateCodingSequenceEntry(currentEntry);
                        codingSequenceCollection.AddCodingSequence(codingSequence);
                    }
                    else if (currentEntryIsGene)
                    {
                        currentEntryIsGene = false;
                        Gene gene = CreateGeneEntry(currentEntry);
                        geneCollection.AddGene(gene);
                    }
                }
            }
            int    geneCount          = geneCollection.collection.Count;           //Size of gene collection
            int    cdsCount           = codingSequenceCollection.collection.Count; //Size of coding sequence collection
            double totalGeneCounter   = 0.0;
            double forwardGeneCounter = 0.0;

            foreach (Gene geneEntry in geneCollection.collection)
            {
                if (!geneEntry.IsReverse)
                {
                    totalGeneCounter++;
                    forwardGeneCounter++;
                }
                else
                {
                    totalGeneCounter++;
                }
            }
            //Forward/Reverse (FR) ratio calculation.
            double value = (forwardGeneCounter / totalGeneCounter);
            double forwardReverseBalance = Math.Round(value, 1);

            //For each gene: if gene isForward or !isReverse > +1 to total and foward
            //else +1 to total
            Summary summary = new Summary(fileName, organism, accession, length, geneCount, forwardReverseBalance, cdsCount, originSequence);
            CollectedGeneBankData geneBankeData = new CollectedGeneBankData(geneCollection, codingSequenceCollection, summary);

            return(geneBankeData);
        }
コード例 #9
0
        static void Main(string[] args)
        {
            var options = new Options();

            if (args.Length != 0)
            {
                Parser                 parser         = new Parser();
                FileReader             reader         = new FileReader();
                GeneBankDisplayFetcher displayFetcher = new GeneBankDisplayFetcher();
                try
                {
                    if (parser.ParseArguments(args, options))
                    {
                        //Match to regex. Checks if file ends with .gbk
                        string gbkFile = options.InputFile;
                        if (File.Exists(gbkFile) && gbkFile.EndsWith(".gb") || gbkFile.EndsWith(".gbk"))
                        {
                            CollectedGeneBankData geneBankData   = reader.ReadGenebankFile(gbkFile);
                            GeneCollection        geneCollection = geneBankData.geneCollection;
                            Summary summary = geneBankData.summary;
                            CodingSequenceCollection cdsCollection = geneBankData.codingSequenceCollection;
                            if (options.Summary)
                            {
                                // get summary from data fetcher
                                displayFetcher.FetchSummary(geneBankData.summary);
                            }

                            string[] geneArray = options.FetchGenes;
                            if (geneArray != null && geneArray.Length != 0)
                            {
                                Console.WriteLine("Fetching gene sequences...");
                                displayFetcher.FetchGeneDisplay(geneCollection, geneArray, summary.OriginSequence);
                                //give options.GenesToFetch to dataFetcher and return the required data.
                            }
                            if (options.FetchCDS != null && options.FetchCDS.Length != 0)
                            {
                                Console.WriteLine("Fetching cds product sequences...");
                                string[] cdsArray = options.FetchCDS;
                                displayFetcher.FetchCDSs(cdsCollection, cdsArray);
                                //give options.CDSsToFetch to dataFetcher and return the required data.
                            }
                            if (options.FetchFeatures != null && options.FetchFeatures.Length != 0)
                            {
                                Console.WriteLine("Fetching gene location features...");
                                string[] featureArray = options.FetchFeatures;
                                displayFetcher.FetchFeatures(geneCollection, cdsCollection, featureArray);
                                //give options.FeaturesToFetch to dataFetcher and return the required data.
                            }
                            if (options.FetchSites != null && options.FetchSites.Length != 0)
                            {
                                Console.WriteLine("Fetching given nucleotide sites...");
                                string[] siteArray = options.FetchSites;
                                displayFetcher.FetchSites(geneCollection, geneBankData.summary.OriginSequence, siteArray);
                                //give options.SitesToFetch to dataFetcher and return the required data.
                            }
                            Console.ReadKey();
                        }
                        else
                        {
                            Console.WriteLine("Given file " + gbkFile + " seems to not exist on your computer. Please check your input.");
                            Console.WriteLine("Press any key to close the console.");
                            Console.ReadKey();
                        }
                    }
                    else
                    {
                        Console.WriteLine("The commandline parser could not find any arguments. Please use the --help function for options.");
                        Console.WriteLine("Press any key to close the console.");
                        Console.WriteLine(parser.ParseArguments(args, options));
                        Console.WriteLine(options.InputFile);
                        Console.WriteLine(args[0] + " " + args[1]);
                        Console.ReadKey();
                    }
                }
                catch (Exception e)
                {
                    Console.WriteLine("Encounted an exception while parsing." + e + "\nPlease check your cmd arguments and try again, or use the --help function.");
                    Console.WriteLine("Press any key to close the console.");
                    Console.ReadKey();
                }
            }
            else
            {
                Console.WriteLine("No arguments we're provided.");
                options.GetHelp();
                Console.WriteLine("Press any key to close the console.");
                Console.ReadKey();
            }
        }