예제 #1
0
        public void TestReusableTokenStream()
        {
            Analyzer analyzer = new CzechAnalyzer(Version.LUCENE_CURRENT);

            AssertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" });
            AssertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česká", "republika" });
        }
예제 #2
0
        public void TestInvalidStopWordFile()
        {
            CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);

            cz.LoadStopWords(new UnreliableInputStream(), Encoding.UTF8);
            AssertAnalyzesTo(cz, "Pokud mluvime o volnem",
                             new String[] { "pokud", "mluvime", "o", "volnem" });
        }
예제 #3
0
        public void TestStopWordFileReuse()
        {
            CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);

            AssertAnalyzesToReuse(cz, "Česká Republika",
                                  new String[] { "česká", "republika" });

            Stream stopwords = new FileStream(customStopFile, FileMode.Open, FileAccess.Read);

            cz.LoadStopWords(stopwords, Encoding.UTF8);

            AssertAnalyzesToReuse(cz, "Česká Republika", new String[] { "česká" });
        }
예제 #4
0
        private Analyzer GuessAnalyzer(string filePath)
        {
            Analyzer ret = null;

            switch (Path.GetFileName(filePath).Substring(0, 2).ToLowerInvariant())
            {
            case "zh":
                ret = new ChineseAnalyzer();
                break;

            case "cs":
                ret = new CzechAnalyzer();
                break;

            case "da":
                ret = new SnowballAnalyzer("Danish");
                break;

            case "nl":
                ret = new SnowballAnalyzer("Dutch");
                break;

            case "en":
                ret = new SnowballAnalyzer("English");
                break;

            case "fi":
                ret = new SnowballAnalyzer("Finnish");
                break;

            case "fr":
                ret = new SnowballAnalyzer("French");
                break;

            case "de":
                ret = new SnowballAnalyzer("German");
                break;

            case "it":
                ret = new SnowballAnalyzer("Italian");
                break;

            case "ja":
                ret = new CJKAnalyzer();
                break;

            case "ko":
                ret = new CJKAnalyzer();
                break;

            case "no":
                ret = new SnowballAnalyzer("Norwegian");
                break;

            case "pt":
                ret = new SnowballAnalyzer("Portuguese");
                break;

            case "ru":
                ret = new SnowballAnalyzer("Russian");
                break;

            case "es":
                ret = new SnowballAnalyzer("Spanish");
                break;

            case "se":
                ret = new SnowballAnalyzer("Swedish");
                break;

            default:
                ret = new StandardAnalyzer();
                break;
            }

            return(ret);
        }
예제 #5
0
        private Analyzer GuessAnalyzer(string filePath, out bool isRTL)
        {
            Analyzer ret = null;

            isRTL = false;

            switch (Path.GetFileName(filePath).Substring(0, 2).ToLowerInvariant())
            {
            case "zh":
                ret = new ChineseAnalyzer();
                break;

            case "cs":
                ret = new CzechAnalyzer();
                break;

            case "da":
                ret = new SnowballAnalyzer("Danish");
                break;

            case "nl":
                ret = new SnowballAnalyzer("Dutch");
                break;

            case "en":
                ret = new SnowballAnalyzer("English");
                break;

            case "fi":
                ret = new SnowballAnalyzer("Finnish");
                break;

            case "fr":
                ret = new SnowballAnalyzer("French");
                break;

            case "de":
                ret = new SnowballAnalyzer("German");
                break;

            case "it":
                ret = new SnowballAnalyzer("Italian");
                break;

            case "ja":
                ret = new CJKAnalyzer();
                break;

            case "ko":
                ret = new CJKAnalyzer();
                break;

            case "no":
                ret = new SnowballAnalyzer("Norwegian");
                break;

            case "pt":
                ret = new SnowballAnalyzer("Portuguese");
                break;

            case "ru":
                ret = new SnowballAnalyzer("Russian");
                break;

            case "es":
                ret = new SnowballAnalyzer("Spanish");
                break;

            case "se":
                ret = new SnowballAnalyzer("Swedish");
                break;

            case "ar":
                isRTL = true;
                // TODO: Lucene 2.9 has a light stemmer for Arabic providing good search results
                ret = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
                break;

            case "he":
            {
                isRTL = true;
                string hspellPath = System.Configuration.ConfigurationManager.AppSettings["hspellPath"];
                if (!string.IsNullOrEmpty(hspellPath) && Directory.Exists(hspellPath))
                {
                    try
                    {
                        ret = new Lucene.Net.Analysis.Hebrew.MorphAnalyzer(hspellPath);
                        break;
                    }
                    catch
                    {
                    }
                }
                ret = new Lucene.Net.Analysis.Hebrew.SimpleAnalyzer();
                break;
            }

            default:
                ret = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
                break;
            }

            return(ret);
        }