private static void FilterGeographicEntity(XElement xml)
        {
            bool isPlace   = xml.Attribute(BlockAttribute.TYPE).IsEqual(EntityType.PlaceName);
            bool isUnknown = xml.Attribute(BlockAttribute.TYPE).IsEqual(EntityType.UnknownProper) &&
                             xml.IsContainAttribute(BlockAttribute.PLACE);

            if (isPlace || isUnknown)
            {
                if (_geoNames != null)
                {
                    string value = xml.Attribute(BlockAttribute.VALUE);
                    if (value.IsEmpty() || ContainsWrongSymbols(value))
                    {
                        //xml.SetType(EntityType.UnknownProper);
                        xml.SetType(EntityType.PseudoGeo);
                        return;
                    }

                    string   valueForTest = value;
                    string[] words        = value.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                    foreach (string word in words)
                    {
                        if (Char.IsUpper(word[0]))
                        {
                            break;
                        }
                        else
                        {
                            valueForTest = valueForTest.Substring(word.Length, valueForTest.Length - word.Length).Trim();
                        }
                    }

                    if (!valueForTest.IsEmpty())
                    {
                        bool isValid = _geoNames.IsExists(valueForTest);

                        /*if (!isValid && isPlace)
                         * {
                         *  xml.SetType(EntityType.UnknownProper);
                         * }*/
                        if (!isValid)
                        {
                            xml.SetType(EntityType.PseudoGeo);
                        }
                        else if (/*isValid &&*/ isUnknown)
                        {
                            xml.SetType(EntityType.PlaceName);
                            xml.SetAttributeValue(BlockAttribute.INFO, "");
                            if (valueForTest != value)
                            {
                                xml.SetAttributeValue(BlockAttribute.VALUE, valueForTest);
                            }
                        }
                    }
                }
            }
        }
Exemplo n.º 2
0
        public static void IsExists1(this HashtableSearcher searcher, string geoName, int max_len)
        {
            try
            {
                var exists = searcher.IsExists(geoName);

                Write2Console('\'' + geoName + '\'' + new string(' ', Math.Max(max_len, geoName.Length) - geoName.Length) + " => HashtableSearcher.IsExists: " + exists,
                              (exists) ? ConsoleColor.White : ConsoleColor.Yellow);
            }
            catch (Exception e)
            {
                Write2Console('\'' + geoName + '\'' + new string(' ', Math.Max(max_len, geoName.Length) - geoName.Length) + " => " + e.ToString(), ConsoleColor.Red);
            }
        }
Exemplo n.º 3
0
        public static void IsExists2(this HashtableSearcher searcher, string geoName)
        {
            //try
            //{
            var exists = searcher.IsExists(geoName /*.HashtableEngineNormlizeText()*/);

            if (!exists)
            {
                Write2Console('\'' + geoName + "' => searcher.IsExists: " + exists, ConsoleColor.Yellow);
            }
            //}
            //catch (Exception e)
            //{
            //    Write2Console( '\'' + geoName + "' => " + e.ToString(), ConsoleColor.Red );
            //}
        }
Exemplo n.º 4
0
        static void test_Search_big(string dataFileName, Encoding dataFileEncoding)
        {
            Console.WriteLine("Begin fetch test data for search...");
            var testList = new List <string>(4000000);

            using (var sr = new StreamReader(dataFileName, dataFileEncoding))
            {
                while (!sr.EndOfStream)
                {
                    var text = sr.ReadLine();
                    if (!string.IsNullOrEmpty(text))
                    {
                        testList.Add(text);
                    }
                }
            }
            Console.WriteLine("End fetch test data, all count: " + testList.Count + '.' + Environment.NewLine);


            Console.WriteLine("Begin full search...");
            using (var searcher = new HashtableSearcher(dataFileName, HashtableEngineHelper.NormlizeText))
            {
                var sw = Stopwatch.StartNew();
                var i  = 0;
                foreach (var text in testList)
                {
                    //searcher.IsExists2( text );

                    i++;
                    var exists = searcher.IsExists(text);
                    if (!exists)
                    {
                        Extensions.Write2Console('\'' + text + "' => searcher.IsExists: " + exists + ", row number: " + i, ConsoleColor.Yellow);
                    }
                }
                sw.Stop();
                Console.WriteLine("End full search, elapsed: " + sw.Elapsed + ", all count: " + testList.Count + '.' + Environment.NewLine);
            }

            #region [.commented. disk read.]

            /*Console.WriteLine("Begin full search...");
             * using ( var searcher = new HashtableSearcher( dataFileName ) )
             * {
             *  var sw = Stopwatch.StartNew();
             *  var i = 0;
             *  using ( var sr = new StreamReader( dataFileName, dataFileEncoding ) )
             *  {
             *      while ( !sr.EndOfStream )
             *      {
             *          var geo_name = sr.ReadLine();
             *          if ( !string.IsNullOrEmpty( geo_name ) )
             *          {
             *              searcher.IsExistsEx( geo_name );
             *              i++;
             *          }
             *      }
             *  }
             *  sw.Stop();
             *
             *  Console.WriteLine( "End full search, elapsed: " + sw.Elapsed + ", all count: " + i );
             * }*/
            #endregion
        }