예제 #1
0
파일: Program.cs 프로젝트: irfiit/wikipedia
		/// <summary>
		/// Load named entities from output files 
		/// </summary>
		private static void LoadNamedEntitiesFromFiles()
		{
			if (NamedEntityReader.FilesExist(PersonsFileName, OrganizationsFileName, LocationsFileName))
			{
				Persons = NamedEntityReader.ReadData(PersonsFileName);
				Organizations = NamedEntityReader.ReadData(OrganizationsFileName);
				Locations = NamedEntityReader.ReadData(LocationsFileName);

				var finder = new Finder(Persons, Organizations, Locations);
				finder.Find();
			}
			else
			{
				Console.WriteLine("Files not exists. Load from XML? (Y/N): ");
				if (Console.ReadLine().ToLower().Equals("y"))
				{
					LoadNamedEntitiesFromXML();
				}
			}
		}
예제 #2
0
파일: Program.cs 프로젝트: irfiit/wikipedia
		/// <summary>
		/// Load named entities from input XML
		/// </summary>
		private static void LoadNamedEntitiesFromXML()
		{
			//string filePath = GetDataPath(@"input_skwiki-latest-pages-articles.xml")
			string filePath = GetDataPath();
			var reader = new InputDataReader();

			reader.SetPagesFromInputFile(filePath);
			var pages = reader.Pages;

			//init arrays
			Infoboxes = new List<Infobox>();
			Geoboxes = new List<Geobox>();
			Citations = new List<Citation>();

			//init arrays for output
			Persons = new List<string>();
			Organizations = new List<string>();
			Locations = new List<string>();

			CategorizeProperties(pages);

			//get entities
			Persons = Persons.Select(x => WordUtils.TrimNonLetterCharacters(x, true)).OrderBy(x => x).Distinct(StringComparer.CurrentCultureIgnoreCase).ToList();
			Organizations = Organizations.Select(x => WordUtils.TrimNonLetterCharacters(x)).OrderBy(x => x).Distinct(StringComparer.CurrentCultureIgnoreCase).ToList();
			Locations = Locations.Select(x => WordUtils.TrimNonLetterCharacters(x, false)).OrderBy(x => x).Distinct(StringComparer.CurrentCultureIgnoreCase).ToList();

			//write data into output files
			NamedEntityWriter.WriteData(Persons, PersonsFileName);
			NamedEntityWriter.WriteData(Organizations, OrganizationsFileName);
			NamedEntityWriter.WriteData(Locations, LocationsFileName);

			//start finder
			var finder = new Finder(Persons, Organizations, Locations);
			finder.Find();
		}