/// <summary> /// Load named entities from input XML /// </summary> private static void LoadNamedEntitiesFromXML() { //string filePath = GetDataPath(@"input_skwiki-latest-pages-articles.xml") string filePath = GetDataPath(); var reader = new InputDataReader(); reader.SetPagesFromInputFile(filePath); var pages = reader.Pages; //init arrays Infoboxes = new List <Infobox>(); Geoboxes = new List <Geobox>(); Citations = new List <Citation>(); //init arrays for output Persons = new List <string>(); Organizations = new List <string>(); Locations = new List <string>(); CategorizeProperties(pages); //get entities Persons = Persons.Select(x => WordUtils.TrimNonLetterCharacters(x, true)).OrderBy(x => x).Distinct(StringComparer.CurrentCultureIgnoreCase).ToList(); Organizations = Organizations.Select(x => WordUtils.TrimNonLetterCharacters(x)).OrderBy(x => x).Distinct(StringComparer.CurrentCultureIgnoreCase).ToList(); Locations = Locations.Select(x => WordUtils.TrimNonLetterCharacters(x, false)).OrderBy(x => x).Distinct(StringComparer.CurrentCultureIgnoreCase).ToList(); //write data into output files NamedEntityWriter.WriteData(Persons, PersonsFileName); NamedEntityWriter.WriteData(Organizations, OrganizationsFileName); NamedEntityWriter.WriteData(Locations, LocationsFileName); //start finder var finder = new Finder(Persons, Organizations, Locations); finder.Find(); }