public JsonResult GetScientificName(string name) { WikipediaReader wikipediaRaeder = new WikipediaReader(); string scientificName = wikipediaRaeder.GetScientificName(name); return(Json(scientificName)); }
public void WikipediaReaderTest() { var xmlDump = GetXmlDump(); using var xmlReader = new WikiDumpXmlReader(xmlDump); var wikiReader = new WikipediaReader( xmlReader, WikipediaReader.DefaultFilter, blockSize: 1); var corpus = wikiReader.Read().ToArray(); Assert.Equal(2, corpus.Length); Assert.Equal(1, corpus[0].Documents.Count); Assert.Equal(1, corpus[0].Metadata.Count); var doc1 = corpus[0].Documents[0]; Assert.Equal("Simple page", doc1.Metadata.Title); Assert.Equal("=Simple Page=\nSome text", doc1.Data); Assert.Equal("Simple page", corpus[0].Metadata[doc1.Metadata.Id].Title); Assert.Equal(doc1.Metadata.Id, corpus[0].Metadata[doc1.Metadata.Id].Id); Assert.Equal(1, corpus[1].Documents.Count); Assert.Equal(1, corpus[1].Metadata.Count); var doc2 = corpus[1].Documents[0]; Assert.Equal("Another page", doc2.Metadata.Title); Assert.Equal("Hello world", doc2.Data); Assert.Equal("Another page", corpus[1].Metadata[doc2.Metadata.Id].Title); Assert.Equal(doc2.Metadata.Id, corpus[1].Metadata[doc2.Metadata.Id].Id); }
public static Node CreateOrSetParents(string scientificName, Type type, SubjectManager subjectManager) { string[] nameArray = scientificName.Split(' '); WikipediaReader wReader = new WikipediaReader(); /** * Lactuca -> Genus * Lactuca sativa -> Species * Lactuca sativa var. capitata -> Variation - Subspecies * Lactuca sativa var. capitata Larissa -> SubSpecies * Lactuca sativa Larissa -> SubSpecies * */ //Lactuca sativa - create genus return genus Taxon genus = GetOrCreateGenus(nameArray[0], subjectManager); if (nameArray.Count() == 2) { return(genus); } //Lactuca sativa Larissa -> SubSpecies if (nameArray.Count() == 3 && !nameArray.Contains("var.")) { string name = nameArray[0] + " " + nameArray[1]; return(GetOrCreateSpecies(name, type, subjectManager, genus)); } //Lactuca sativa var. capitata -> Species if (nameArray.Count() == 4 && nameArray.Contains("var.")) { string name = nameArray[0] + " " + nameArray[1]; return(GetOrCreateSpecies(name, type, subjectManager, genus)); } //Lactuca sativa var. capitata Larissa -> SubSpecies if (nameArray.Count() == 5 && nameArray.Contains("var.")) { string name = nameArray[0] + " " + nameArray[1]; return(GetOrCreateSpecies(name, type, subjectManager, genus)); } if (nameArray.Count() > 5) { string name = nameArray[0] + " " + nameArray[1]; return(GetOrCreateSpecies(name, type, subjectManager, genus)); } return(null); }
static void TransformWikiDump() { string pathToSave = wikiPath; PrepareOutputDirectory(pathToSave); using var xmlReader = new WikiDumpXmlReader(wikiDumpFilePath); ICorpusReader <string> reader = new WikipediaReader( xmlReader, WikipediaReader.DefaultFilter, (ushort)BlockSize, CorpusSize); ICorpusWriter <string> writer = new CorpusZipWriter <string>(pathToSave, stringDataSerializer); writer.Write(reader.Read()); }
private static Species GetOrCreateSpecies(string speciesName, Type type, SubjectManager subjectManager, Taxon genus) { WikipediaReader wReader = new WikipediaReader(); Species species = new Species(); if (type.Equals(typeof(Plant))) { species = new Plant(); if (subjectManager.GetAll <Species>().Any(s => s.ScientificName.Equals(speciesName))) { species = subjectManager.GetAll <Plant>().FirstOrDefault(s => s.ScientificName.Equals(speciesName)); } else { species.ScientificName = speciesName; species.Name = wReader.GetName(speciesName); species.Parent = genus; species.Rank = TaxonRank.Species; subjectManager.Create(species); } } else if (type.Equals(typeof(Animal))) { species = new Animal(); if (subjectManager.GetAll <Species>().Any(s => s.ScientificName.Equals(speciesName))) { species = subjectManager.GetAll <Animal>().FirstOrDefault(s => s.ScientificName.Equals(speciesName)); } else { species.ScientificName = speciesName; species.Name = wReader.GetName(speciesName); species.Parent = genus; species.Rank = TaxonRank.Species; subjectManager.Create(species); } } return(species); }
private static Taxon GetOrCreateGenus(string genusName, SubjectManager subjectManager) { WikipediaReader wReader = new WikipediaReader(); Taxon genus = new Taxon(); if (subjectManager.GetAll <Taxon>().Any(s => s.ScientificName.Equals(genusName))) { genus = subjectManager.GetAll <Taxon>().FirstOrDefault(s => s.ScientificName.Equals(genusName)); if (string.IsNullOrEmpty(genus.Name) && !string.IsNullOrEmpty(genus.ScientificName)) { genus.Name = wReader.GetName(genus.ScientificName); } if (string.IsNullOrEmpty(genus.ScientificName) && !string.IsNullOrEmpty(genus.Name)) { genus.ScientificName = wReader.GetScientificName(genus.Name); } } else { genus.ScientificName = genusName; genus.Name = wReader.GetName(genusName); if (String.IsNullOrEmpty(genus.Name)) { genus.Name = genusName; } genus.Rank = TaxonRank.Genus; subjectManager.Create(genus); } return(genus); }