public static void FilterGeographic(ref XElement xmlText) { if (_geoNames == null) { try { _geoNames = new HashtableSearcher(Utils.GlobalVariables.UserResourcesPath + _path2GeoDict, HashtableEngineHelper.NormlizeText); } catch (DiskSearchEngineException ex) { Logger.WriteError("Ошибка при загрузке словаря"); Logger.Write(ex); } } if (xmlText != null) { foreach (XElement entity in xmlText.Descendants()) { if (entity.Name == UnitTextType.ENTITY.ToString()) { FilterGeographicEntity(entity); } } } }
static void test_2() { var dataFileName = @"E:\GeoDB\DiskSearchEngine\data\fusking.rotter.data"; var dataFileEncoding = Encoding.UTF7; //Encoding.Unicode; //Encoding.UTF8; // create_datafile_in_need_encoding(dataFileName, dataFileEncoding); //var indexHeader = HashtableIndexer.GetIndexHeaderByDataFile( dataFileName ); var allRecordCountInDataFile = HashtableEngineHelper.CalulateAllRecordCountInDataFile(dataFileName, dataFileEncoding); HashtableIndexer.BuildIndex(dataFileName, dataFileEncoding, allRecordCountInDataFile.NearestPrimeNumber(), HashtableEngineHelper.NormlizeText); using (var searcher = new HashtableSearcher(dataFileName, HashtableEngineHelper.NormlizeText)) { var max_len = geo_names.Max(_ => _.Length); searcher.IsExists1("кызыл-орда", max_len); foreach (var geo_name in geo_names) { searcher.IsExists1(geo_name, max_len); } } }
static void test_Search_small(string dataFileName) { using (var searcher = new HashtableSearcher(dataFileName, HashtableEngineHelper.NormlizeText)) { var max_len = geo_names.Max(_ => _.Length); searcher.IsExists1("кызыл-орда", max_len); foreach (var geo_name in geo_names) { searcher.IsExists1(geo_name, max_len); } } }
public static void IsExists1(this HashtableSearcher searcher, string geoName, int max_len) { try { var exists = searcher.IsExists(geoName); Write2Console('\'' + geoName + '\'' + new string(' ', Math.Max(max_len, geoName.Length) - geoName.Length) + " => HashtableSearcher.IsExists: " + exists, (exists) ? ConsoleColor.White : ConsoleColor.Yellow); } catch (Exception e) { Write2Console('\'' + geoName + '\'' + new string(' ', Math.Max(max_len, geoName.Length) - geoName.Length) + " => " + e.ToString(), ConsoleColor.Red); } }
public static void IsExists2(this HashtableSearcher searcher, string geoName) { //try //{ var exists = searcher.IsExists(geoName /*.HashtableEngineNormlizeText()*/); if (!exists) { Write2Console('\'' + geoName + "' => searcher.IsExists: " + exists, ConsoleColor.Yellow); } //} //catch (Exception e) //{ // Write2Console( '\'' + geoName + "' => " + e.ToString(), ConsoleColor.Red ); //} }
static void test_Search_big(string dataFileName, Encoding dataFileEncoding) { Console.WriteLine("Begin fetch test data for search..."); var testList = new List <string>(4000000); using (var sr = new StreamReader(dataFileName, dataFileEncoding)) { while (!sr.EndOfStream) { var text = sr.ReadLine(); if (!string.IsNullOrEmpty(text)) { testList.Add(text); } } } Console.WriteLine("End fetch test data, all count: " + testList.Count + '.' + Environment.NewLine); Console.WriteLine("Begin full search..."); using (var searcher = new HashtableSearcher(dataFileName, HashtableEngineHelper.NormlizeText)) { var sw = Stopwatch.StartNew(); var i = 0; foreach (var text in testList) { //searcher.IsExists2( text ); i++; var exists = searcher.IsExists(text); if (!exists) { Extensions.Write2Console('\'' + text + "' => searcher.IsExists: " + exists + ", row number: " + i, ConsoleColor.Yellow); } } sw.Stop(); Console.WriteLine("End full search, elapsed: " + sw.Elapsed + ", all count: " + testList.Count + '.' + Environment.NewLine); } #region [.commented. disk read.] /*Console.WriteLine("Begin full search..."); * using ( var searcher = new HashtableSearcher( dataFileName ) ) * { * var sw = Stopwatch.StartNew(); * var i = 0; * using ( var sr = new StreamReader( dataFileName, dataFileEncoding ) ) * { * while ( !sr.EndOfStream ) * { * var geo_name = sr.ReadLine(); * if ( !string.IsNullOrEmpty( geo_name ) ) * { * searcher.IsExistsEx( geo_name ); * i++; * } * } * } * sw.Stop(); * * Console.WriteLine( "End full search, elapsed: " + sw.Elapsed + ", all count: " + i ); * }*/ #endregion }