An Analyzer that tokenizes text with ChineseTokenizer and filters with ChineseFilter
상속: Lucene.Net.Analysis.Analyzer
예제 #1
0
        public virtual void TestReusableTokenStream()
        {
            Analyzer a = new ChineseAnalyzer();

            AssertAnalyzesTo(a, "中华人民共和国", new string[] { "中", "华", "人", "民", "共", "和", "国" }, new int[] { 0, 1, 2, 3, 4, 5, 6 }, new int[] { 1, 2, 3, 4, 5, 6, 7 });
            AssertAnalyzesTo(a, "北京市", new string[] { "北", "京", "市" }, new int[] { 0, 1, 2 }, new int[] { 1, 2, 3 });
        }
예제 #2
0
        public virtual void TestNumerics()
        {
            Analyzer justTokenizer = new JustChineseTokenizerAnalyzer();

            AssertAnalyzesTo(justTokenizer, "中1234", new string[] { "中", "1234" });

            // in this case the ChineseAnalyzer (which applies ChineseFilter) will remove the numeric token.
            Analyzer a = new ChineseAnalyzer();

            AssertAnalyzesTo(a, "中1234", new string[] { "中" });
        }
    protected void Page_Load(object sender, EventArgs e)
    {

        //if (Session["KeyWords"] == null ? false : true)
        //{
        //    Response.Redirect("Search.aspx");
        //}
        String text = Session["KeyWords"].ToString();
        ChineseAnalyzer analyzer = new ChineseAnalyzer();
        TokenStream ts = analyzer.TokenStream("ItemName", new System.IO.StringReader(text));
        Lucene.Net.Analysis.Token token;
        try
        {
            int n = 0;
            while ((token = ts.Next()) != null)
            {
                this.lbMsg.Text += (n++) + "->" + token.TermText() + " " + token.StartOffset() + " " + token.EndOffset() + " " + token.Type() + "<br>";
                 //   Response.Write((n++) + "->" + token.TermText() + " " + token.StartOffset() + " "
                 //+ token.EndOffset() + " " + token.Type() + "<br>");
            }
        }
        catch
        {
            this.lbMsg.Text = "wrong";
        } 

       // Analyzer analyzer = new StandardAnalyzer();
        Directory directory = FSDirectory.GetDirectory(Server.MapPath("/indexFile/"), false);

        IndexSearcher isearcher = new IndexSearcher(directory);

        Query query;
        query = QueryParser.Parse(Session["KeyWords"].ToString(), "ItemName", analyzer);
        //query = QueryParser.Parse("2", "nid", analyzer);
        Hits hits = isearcher.Search(query);
        this.lbMsg.Text += "<font color=red>共找到" + hits.Length() + "条记录</font><br>";
        //Response.Write("<font color=red>共找到" + hits.Length() + "条记录</font><br>");

        for (int i = 0; i < hits.Length(); i++)
        {

            Document hitDoc = hits.Doc(i);
            this.lbMsg.Text += "编号:" + hitDoc.Get("ItemID").ToString() + "<br>"
                + "分类:" + hitDoc.Get("CategoryName").ToString() + "<br>"
                + "专题:" + hitDoc.Get("ProductName").ToString() + "<br>"
                + "标题:<a href=" + hitDoc.Get("visiturl").ToString() + ">" + hitDoc.Get("ItemName").ToString() + "</a><br>";
            //Response.Write("编号:" + hitDoc.Get("ItemID").ToString() + "<br>");
            //Response.Write("分类:" + hitDoc.Get("CategoryName").ToString() + "<br>");
            //Response.Write("标题:<a href=" + hitDoc.Get("visiturl").ToString() + ">" + hitDoc.Get("ItemName").ToString() + "</a><br>");
            //Response.Write("专题:" + hitDoc.Get("ProductName").ToString() + "<br>");
        }
        isearcher.Close();
        directory.Close();
    }
예제 #4
0
 public void TestReusableTokenStream()
 {
     Analyzer a = new ChineseAnalyzer();
     AssertAnalyzesToReuse(a, "中华人民共和国",
                           new String[] { "中", "华", "人", "民", "共", "和", "国" },
                           new int[] { 0, 1, 2, 3, 4, 5, 6 },
                           new int[] { 1, 2, 3, 4, 5, 6, 7 });
     AssertAnalyzesToReuse(a, "北京市",
                           new String[] { "北", "京", "市" },
                           new int[] { 0, 1, 2 },
                           new int[] { 1, 2, 3 });
 }
예제 #5
0
        public virtual void TestEnglish()
        {
            Analyzer chinese = new ChineseAnalyzer();

            AssertAnalyzesTo(chinese, "This is a Test. b c d", new string[] { "test" });

            Analyzer justTokenizer = new JustChineseTokenizerAnalyzer();

            AssertAnalyzesTo(justTokenizer, "This is a Test. b c d", new string[] { "this", "is", "a", "test", "b", "c", "d" });

            Analyzer justFilter = new JustChineseFilterAnalyzer();

            AssertAnalyzesTo(justFilter, "This is a Test. b c d", new string[] { "This", "Test." });
        }
예제 #6
0
        private Analyzer GuessAnalyzer(string filePath)
        {
            Analyzer ret = null;

            switch (Path.GetFileName(filePath).Substring(0, 2).ToLowerInvariant())
            {
                case "zh":
                    ret = new ChineseAnalyzer();
                    break;
                case "cs":
                    ret = new CzechAnalyzer();
                    break;
                case "da":
                    ret = new SnowballAnalyzer("Danish");
                    break;
                case "nl":
                    ret = new SnowballAnalyzer("Dutch");
                    break;
                case "en":
                    ret = new SnowballAnalyzer("English");
                    break;
                case "fi":
                    ret = new SnowballAnalyzer("Finnish");
                    break;
                case "fr":
                    ret = new SnowballAnalyzer("French");
                    break;
                case "de":
                    ret = new SnowballAnalyzer("German");
                    break;
                case "it":
                    ret = new SnowballAnalyzer("Italian");
                    break;
                case "ja":
                    ret = new CJKAnalyzer();
                    break;
                case "ko":
                    ret = new CJKAnalyzer();
                    break;
                case "no":
                    ret = new SnowballAnalyzer("Norwegian");
                    break;
                case "pt":
                    ret = new SnowballAnalyzer("Portuguese");
                    break;
                case "ru":
                    ret = new SnowballAnalyzer("Russian");
                    break;
                case "es":
                    ret = new SnowballAnalyzer("Spanish");
                    break;
                case "se":
                    ret = new SnowballAnalyzer("Swedish");
                    break;
                default:
                    ret = new StandardAnalyzer();
                    break;
            }

            return ret;
        }
        public virtual void TestEnglish()
        {
            Analyzer chinese = new ChineseAnalyzer();
            AssertAnalyzesTo(chinese, "This is a Test. b c d", new string[] { "test" });

            Analyzer justTokenizer = new JustChineseTokenizerAnalyzer(this);
            AssertAnalyzesTo(justTokenizer, "This is a Test. b c d", new string[] { "this", "is", "a", "test", "b", "c", "d" });

            Analyzer justFilter = new JustChineseFilterAnalyzer(this);
            AssertAnalyzesTo(justFilter, "This is a Test. b c d", new string[] { "This", "Test." });
        }
        public virtual void TestNumerics()
        {
            Analyzer justTokenizer = new JustChineseTokenizerAnalyzer(this);
            AssertAnalyzesTo(justTokenizer, "中1234", new string[] { "中", "1234" });

            // in this case the ChineseAnalyzer (which applies ChineseFilter) will remove the numeric token.
            Analyzer a = new ChineseAnalyzer();
            AssertAnalyzesTo(a, "中1234", new string[] { "中" });
        }
예제 #9
0
        private Analyzer GuessAnalyzer(string filePath,out bool isRTL)
        {
            Analyzer ret = null;
            isRTL = false;

            switch (Path.GetFileName(filePath).Substring(0, 2).ToLowerInvariant())
            {
                case "zh":
                    ret = new ChineseAnalyzer();
                    break;
                case "cs":
                    ret = new CzechAnalyzer();
                    break;
                case "da":
                    ret = new SnowballAnalyzer("Danish");
                    break;
                case "nl":
                    ret = new SnowballAnalyzer("Dutch");
                    break;
                case "en":
                    ret = new SnowballAnalyzer("English");
                    break;
                case "fi":
                    ret = new SnowballAnalyzer("Finnish");
                    break;
                case "fr":
                    ret = new SnowballAnalyzer("French");
                    break;
                case "de":
                    ret = new SnowballAnalyzer("German");
                    break;
                case "it":
                    ret = new SnowballAnalyzer("Italian");
                    break;
                case "ja":
                    ret = new CJKAnalyzer();
                    break;
                case "ko":
                    ret = new CJKAnalyzer();
                    break;
                case "no":
                    ret = new SnowballAnalyzer("Norwegian");
                    break;
                case "pt":
                    ret = new SnowballAnalyzer("Portuguese");
                    break;
                case "ru":
                    ret = new SnowballAnalyzer("Russian");
                    break;
                case "es":
                    ret = new SnowballAnalyzer("Spanish");
                    break;
                case "se":
                    ret = new SnowballAnalyzer("Swedish");
                    break;
                case "ar":
                    isRTL = true;
                    // TODO: Lucene 2.9 has a light stemmer for Arabic providing good search results
                    ret = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
                    break;
                case "he":
                    {
                        isRTL = true;
                        string hspellPath = System.Configuration.ConfigurationManager.AppSettings["hspellPath"];
                        if (!string.IsNullOrEmpty(hspellPath) && Directory.Exists(hspellPath))
                        {
                            try
                            {
                                ret = new Lucene.Net.Analysis.Hebrew.MorphAnalyzer(hspellPath);
                                break;
                            }
                            catch
                            {
                            }
                        }
                        ret = new Lucene.Net.Analysis.Hebrew.SimpleAnalyzer();
                        break;
                    }
                default:
                    ret = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
                    break;
            }

            return ret;
        }