Exemplo n.º 1
0
        /// <summary>
        /// Load named entities from input XML
        /// </summary>
        private static void LoadNamedEntitiesFromXML()
        {
            //string filePath = GetDataPath(@"input_skwiki-latest-pages-articles.xml")
            string filePath = GetDataPath();
            var    reader   = new InputDataReader();

            reader.SetPagesFromInputFile(filePath);
            var pages = reader.Pages;

            //init arrays
            Infoboxes = new List <Infobox>();
            Geoboxes  = new List <Geobox>();
            Citations = new List <Citation>();

            //init arrays for output
            Persons       = new List <string>();
            Organizations = new List <string>();
            Locations     = new List <string>();

            CategorizeProperties(pages);

            //get entities
            Persons       = Persons.Select(x => WordUtils.TrimNonLetterCharacters(x, true)).OrderBy(x => x).Distinct(StringComparer.CurrentCultureIgnoreCase).ToList();
            Organizations = Organizations.Select(x => WordUtils.TrimNonLetterCharacters(x)).OrderBy(x => x).Distinct(StringComparer.CurrentCultureIgnoreCase).ToList();
            Locations     = Locations.Select(x => WordUtils.TrimNonLetterCharacters(x, false)).OrderBy(x => x).Distinct(StringComparer.CurrentCultureIgnoreCase).ToList();

            //write data into output files
            NamedEntityWriter.WriteData(Persons, PersonsFileName);
            NamedEntityWriter.WriteData(Organizations, OrganizationsFileName);
            NamedEntityWriter.WriteData(Locations, LocationsFileName);

            //start finder
            var finder = new Finder(Persons, Organizations, Locations);

            finder.Find();
        }