/// <summary> /// Load named entities from output files /// </summary> private static void LoadNamedEntitiesFromFiles() { if (NamedEntityReader.FilesExist(PersonsFileName, OrganizationsFileName, LocationsFileName)) { Persons = NamedEntityReader.ReadData(PersonsFileName); Organizations = NamedEntityReader.ReadData(OrganizationsFileName); Locations = NamedEntityReader.ReadData(LocationsFileName); var finder = new Finder(Persons, Organizations, Locations); finder.Find(); } else { Console.WriteLine("Files not exists. Load from XML? (Y/N): "); if (Console.ReadLine().ToLower().Equals("y")) { LoadNamedEntitiesFromXML(); } } }
/// <summary> /// Load named entities from input XML /// </summary> private static void LoadNamedEntitiesFromXML() { //string filePath = GetDataPath(@"input_skwiki-latest-pages-articles.xml") string filePath = GetDataPath(); var reader = new InputDataReader(); reader.SetPagesFromInputFile(filePath); var pages = reader.Pages; //init arrays Infoboxes = new List<Infobox>(); Geoboxes = new List<Geobox>(); Citations = new List<Citation>(); //init arrays for output Persons = new List<string>(); Organizations = new List<string>(); Locations = new List<string>(); CategorizeProperties(pages); //get entities Persons = Persons.Select(x => WordUtils.TrimNonLetterCharacters(x, true)).OrderBy(x => x).Distinct(StringComparer.CurrentCultureIgnoreCase).ToList(); Organizations = Organizations.Select(x => WordUtils.TrimNonLetterCharacters(x)).OrderBy(x => x).Distinct(StringComparer.CurrentCultureIgnoreCase).ToList(); Locations = Locations.Select(x => WordUtils.TrimNonLetterCharacters(x, false)).OrderBy(x => x).Distinct(StringComparer.CurrentCultureIgnoreCase).ToList(); //write data into output files NamedEntityWriter.WriteData(Persons, PersonsFileName); NamedEntityWriter.WriteData(Organizations, OrganizationsFileName); NamedEntityWriter.WriteData(Locations, LocationsFileName); //start finder var finder = new Finder(Persons, Organizations, Locations); finder.Find(); }