Пример #1
0
        public JsonResult GetScientificName(string name)
        {
            WikipediaReader wikipediaRaeder = new WikipediaReader();
            string          scientificName  = wikipediaRaeder.GetScientificName(name);

            return(Json(scientificName));
        }
Пример #2
0
        public void WikipediaReaderTest()
        {
            var xmlDump = GetXmlDump();

            using var xmlReader = new WikiDumpXmlReader(xmlDump);
            var wikiReader = new WikipediaReader(
                xmlReader,
                WikipediaReader.DefaultFilter,
                blockSize: 1);
            var corpus = wikiReader.Read().ToArray();

            Assert.Equal(2, corpus.Length);

            Assert.Equal(1, corpus[0].Documents.Count);
            Assert.Equal(1, corpus[0].Metadata.Count);
            var doc1 = corpus[0].Documents[0];

            Assert.Equal("Simple page", doc1.Metadata.Title);
            Assert.Equal("=Simple Page=\nSome text", doc1.Data);
            Assert.Equal("Simple page", corpus[0].Metadata[doc1.Metadata.Id].Title);
            Assert.Equal(doc1.Metadata.Id, corpus[0].Metadata[doc1.Metadata.Id].Id);

            Assert.Equal(1, corpus[1].Documents.Count);
            Assert.Equal(1, corpus[1].Metadata.Count);
            var doc2 = corpus[1].Documents[0];

            Assert.Equal("Another page", doc2.Metadata.Title);
            Assert.Equal("Hello world", doc2.Data);
            Assert.Equal("Another page", corpus[1].Metadata[doc2.Metadata.Id].Title);
            Assert.Equal(doc2.Metadata.Id, corpus[1].Metadata[doc2.Metadata.Id].Id);
        }
Пример #3
0
        public static Node CreateOrSetParents(string scientificName, Type type, SubjectManager subjectManager)
        {
            string[]        nameArray = scientificName.Split(' ');
            WikipediaReader wReader   = new WikipediaReader();

            /**
             * Lactuca                      -> Genus
             * Lactuca sativa               -> Species
             * Lactuca sativa var. capitata -> Variation - Subspecies
             * Lactuca sativa var. capitata Larissa -> SubSpecies
             * Lactuca sativa Larissa -> SubSpecies
             * */

            //Lactuca sativa - create genus return genus
            Taxon genus = GetOrCreateGenus(nameArray[0], subjectManager);

            if (nameArray.Count() == 2)
            {
                return(genus);
            }

            //Lactuca sativa Larissa -> SubSpecies
            if (nameArray.Count() == 3 && !nameArray.Contains("var."))
            {
                string name = nameArray[0] + " " + nameArray[1];
                return(GetOrCreateSpecies(name, type, subjectManager, genus));
            }

            //Lactuca sativa var. capitata -> Species

            if (nameArray.Count() == 4 && nameArray.Contains("var."))
            {
                string name = nameArray[0] + " " + nameArray[1];
                return(GetOrCreateSpecies(name, type, subjectManager, genus));
            }

            //Lactuca sativa var. capitata Larissa -> SubSpecies
            if (nameArray.Count() == 5 && nameArray.Contains("var."))
            {
                string name = nameArray[0] + " " + nameArray[1];
                return(GetOrCreateSpecies(name, type, subjectManager, genus));
            }

            if (nameArray.Count() > 5)
            {
                string name = nameArray[0] + " " + nameArray[1];
                return(GetOrCreateSpecies(name, type, subjectManager, genus));
            }

            return(null);
        }
Пример #4
0
        static void TransformWikiDump()
        {
            string pathToSave = wikiPath;

            PrepareOutputDirectory(pathToSave);

            using var xmlReader = new WikiDumpXmlReader(wikiDumpFilePath);

            ICorpusReader <string> reader = new WikipediaReader(
                xmlReader,
                WikipediaReader.DefaultFilter,
                (ushort)BlockSize,
                CorpusSize);
            ICorpusWriter <string> writer = new CorpusZipWriter <string>(pathToSave, stringDataSerializer);

            writer.Write(reader.Read());
        }
Пример #5
0
        private static Species GetOrCreateSpecies(string speciesName, Type type, SubjectManager subjectManager, Taxon genus)
        {
            WikipediaReader wReader = new WikipediaReader();

            Species species = new Species();

            if (type.Equals(typeof(Plant)))
            {
                species = new Plant();

                if (subjectManager.GetAll <Species>().Any(s => s.ScientificName.Equals(speciesName)))
                {
                    species = subjectManager.GetAll <Plant>().FirstOrDefault(s => s.ScientificName.Equals(speciesName));
                }
                else
                {
                    species.ScientificName = speciesName;
                    species.Name           = wReader.GetName(speciesName);
                    species.Parent         = genus;
                    species.Rank           = TaxonRank.Species;
                    subjectManager.Create(species);
                }
            }
            else if (type.Equals(typeof(Animal)))
            {
                species = new Animal();

                if (subjectManager.GetAll <Species>().Any(s => s.ScientificName.Equals(speciesName)))
                {
                    species = subjectManager.GetAll <Animal>().FirstOrDefault(s => s.ScientificName.Equals(speciesName));
                }
                else
                {
                    species.ScientificName = speciesName;
                    species.Name           = wReader.GetName(speciesName);
                    species.Parent         = genus;
                    species.Rank           = TaxonRank.Species;
                    subjectManager.Create(species);
                }
            }

            return(species);
        }
Пример #6
0
        private static Taxon GetOrCreateGenus(string genusName, SubjectManager subjectManager)
        {
            WikipediaReader wReader = new WikipediaReader();

            Taxon genus = new Taxon();

            if (subjectManager.GetAll <Taxon>().Any(s => s.ScientificName.Equals(genusName)))
            {
                genus = subjectManager.GetAll <Taxon>().FirstOrDefault(s => s.ScientificName.Equals(genusName));

                if (string.IsNullOrEmpty(genus.Name) && !string.IsNullOrEmpty(genus.ScientificName))
                {
                    genus.Name = wReader.GetName(genus.ScientificName);
                }

                if (string.IsNullOrEmpty(genus.ScientificName) && !string.IsNullOrEmpty(genus.Name))
                {
                    genus.ScientificName = wReader.GetScientificName(genus.Name);
                }
            }
            else
            {
                genus.ScientificName = genusName;
                genus.Name           = wReader.GetName(genusName);

                if (String.IsNullOrEmpty(genus.Name))
                {
                    genus.Name = genusName;
                }

                genus.Rank = TaxonRank.Genus;

                subjectManager.Create(genus);
            }

            return(genus);
        }