public virtual void TestReusableTokenStream() { Analyzer a = new ChineseAnalyzer(); AssertAnalyzesTo(a, "中华人民共和国", new string[] { "中", "华", "人", "民", "共", "和", "国" }, new int[] { 0, 1, 2, 3, 4, 5, 6 }, new int[] { 1, 2, 3, 4, 5, 6, 7 }); AssertAnalyzesTo(a, "北京市", new string[] { "北", "京", "市" }, new int[] { 0, 1, 2 }, new int[] { 1, 2, 3 }); }
public virtual void TestNumerics() { Analyzer justTokenizer = new JustChineseTokenizerAnalyzer(); AssertAnalyzesTo(justTokenizer, "中1234", new string[] { "中", "1234" }); // in this case the ChineseAnalyzer (which applies ChineseFilter) will remove the numeric token. Analyzer a = new ChineseAnalyzer(); AssertAnalyzesTo(a, "中1234", new string[] { "中" }); }
protected void Page_Load(object sender, EventArgs e) { //if (Session["KeyWords"] == null ? false : true) //{ // Response.Redirect("Search.aspx"); //} String text = Session["KeyWords"].ToString(); ChineseAnalyzer analyzer = new ChineseAnalyzer(); TokenStream ts = analyzer.TokenStream("ItemName", new System.IO.StringReader(text)); Lucene.Net.Analysis.Token token; try { int n = 0; while ((token = ts.Next()) != null) { this.lbMsg.Text += (n++) + "->" + token.TermText() + " " + token.StartOffset() + " " + token.EndOffset() + " " + token.Type() + "<br>"; // Response.Write((n++) + "->" + token.TermText() + " " + token.StartOffset() + " " //+ token.EndOffset() + " " + token.Type() + "<br>"); } } catch { this.lbMsg.Text = "wrong"; } // Analyzer analyzer = new StandardAnalyzer(); Directory directory = FSDirectory.GetDirectory(Server.MapPath("/indexFile/"), false); IndexSearcher isearcher = new IndexSearcher(directory); Query query; query = QueryParser.Parse(Session["KeyWords"].ToString(), "ItemName", analyzer); //query = QueryParser.Parse("2", "nid", analyzer); Hits hits = isearcher.Search(query); this.lbMsg.Text += "<font color=red>共找到" + hits.Length() + "条记录</font><br>"; //Response.Write("<font color=red>共找到" + hits.Length() + "条记录</font><br>"); for (int i = 0; i < hits.Length(); i++) { Document hitDoc = hits.Doc(i); this.lbMsg.Text += "编号:" + hitDoc.Get("ItemID").ToString() + "<br>" + "分类:" + hitDoc.Get("CategoryName").ToString() + "<br>" + "专题:" + hitDoc.Get("ProductName").ToString() + "<br>" + "标题:<a href=" + hitDoc.Get("visiturl").ToString() + ">" + hitDoc.Get("ItemName").ToString() + "</a><br>"; //Response.Write("编号:" + hitDoc.Get("ItemID").ToString() + "<br>"); //Response.Write("分类:" + hitDoc.Get("CategoryName").ToString() + "<br>"); //Response.Write("标题:<a href=" + hitDoc.Get("visiturl").ToString() + ">" + hitDoc.Get("ItemName").ToString() + "</a><br>"); //Response.Write("专题:" + hitDoc.Get("ProductName").ToString() + "<br>"); } isearcher.Close(); directory.Close(); }
public void TestReusableTokenStream() { Analyzer a = new ChineseAnalyzer(); AssertAnalyzesToReuse(a, "中华人民共和国", new String[] { "中", "华", "人", "民", "共", "和", "国" }, new int[] { 0, 1, 2, 3, 4, 5, 6 }, new int[] { 1, 2, 3, 4, 5, 6, 7 }); AssertAnalyzesToReuse(a, "北京市", new String[] { "北", "京", "市" }, new int[] { 0, 1, 2 }, new int[] { 1, 2, 3 }); }
public virtual void TestEnglish() { Analyzer chinese = new ChineseAnalyzer(); AssertAnalyzesTo(chinese, "This is a Test. b c d", new string[] { "test" }); Analyzer justTokenizer = new JustChineseTokenizerAnalyzer(); AssertAnalyzesTo(justTokenizer, "This is a Test. b c d", new string[] { "this", "is", "a", "test", "b", "c", "d" }); Analyzer justFilter = new JustChineseFilterAnalyzer(); AssertAnalyzesTo(justFilter, "This is a Test. b c d", new string[] { "This", "Test." }); }
private Analyzer GuessAnalyzer(string filePath) { Analyzer ret = null; switch (Path.GetFileName(filePath).Substring(0, 2).ToLowerInvariant()) { case "zh": ret = new ChineseAnalyzer(); break; case "cs": ret = new CzechAnalyzer(); break; case "da": ret = new SnowballAnalyzer("Danish"); break; case "nl": ret = new SnowballAnalyzer("Dutch"); break; case "en": ret = new SnowballAnalyzer("English"); break; case "fi": ret = new SnowballAnalyzer("Finnish"); break; case "fr": ret = new SnowballAnalyzer("French"); break; case "de": ret = new SnowballAnalyzer("German"); break; case "it": ret = new SnowballAnalyzer("Italian"); break; case "ja": ret = new CJKAnalyzer(); break; case "ko": ret = new CJKAnalyzer(); break; case "no": ret = new SnowballAnalyzer("Norwegian"); break; case "pt": ret = new SnowballAnalyzer("Portuguese"); break; case "ru": ret = new SnowballAnalyzer("Russian"); break; case "es": ret = new SnowballAnalyzer("Spanish"); break; case "se": ret = new SnowballAnalyzer("Swedish"); break; default: ret = new StandardAnalyzer(); break; } return ret; }
public virtual void TestEnglish() { Analyzer chinese = new ChineseAnalyzer(); AssertAnalyzesTo(chinese, "This is a Test. b c d", new string[] { "test" }); Analyzer justTokenizer = new JustChineseTokenizerAnalyzer(this); AssertAnalyzesTo(justTokenizer, "This is a Test. b c d", new string[] { "this", "is", "a", "test", "b", "c", "d" }); Analyzer justFilter = new JustChineseFilterAnalyzer(this); AssertAnalyzesTo(justFilter, "This is a Test. b c d", new string[] { "This", "Test." }); }
public virtual void TestNumerics() { Analyzer justTokenizer = new JustChineseTokenizerAnalyzer(this); AssertAnalyzesTo(justTokenizer, "中1234", new string[] { "中", "1234" }); // in this case the ChineseAnalyzer (which applies ChineseFilter) will remove the numeric token. Analyzer a = new ChineseAnalyzer(); AssertAnalyzesTo(a, "中1234", new string[] { "中" }); }
private Analyzer GuessAnalyzer(string filePath,out bool isRTL) { Analyzer ret = null; isRTL = false; switch (Path.GetFileName(filePath).Substring(0, 2).ToLowerInvariant()) { case "zh": ret = new ChineseAnalyzer(); break; case "cs": ret = new CzechAnalyzer(); break; case "da": ret = new SnowballAnalyzer("Danish"); break; case "nl": ret = new SnowballAnalyzer("Dutch"); break; case "en": ret = new SnowballAnalyzer("English"); break; case "fi": ret = new SnowballAnalyzer("Finnish"); break; case "fr": ret = new SnowballAnalyzer("French"); break; case "de": ret = new SnowballAnalyzer("German"); break; case "it": ret = new SnowballAnalyzer("Italian"); break; case "ja": ret = new CJKAnalyzer(); break; case "ko": ret = new CJKAnalyzer(); break; case "no": ret = new SnowballAnalyzer("Norwegian"); break; case "pt": ret = new SnowballAnalyzer("Portuguese"); break; case "ru": ret = new SnowballAnalyzer("Russian"); break; case "es": ret = new SnowballAnalyzer("Spanish"); break; case "se": ret = new SnowballAnalyzer("Swedish"); break; case "ar": isRTL = true; // TODO: Lucene 2.9 has a light stemmer for Arabic providing good search results ret = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); break; case "he": { isRTL = true; string hspellPath = System.Configuration.ConfigurationManager.AppSettings["hspellPath"]; if (!string.IsNullOrEmpty(hspellPath) && Directory.Exists(hspellPath)) { try { ret = new Lucene.Net.Analysis.Hebrew.MorphAnalyzer(hspellPath); break; } catch { } } ret = new Lucene.Net.Analysis.Hebrew.SimpleAnalyzer(); break; } default: ret = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); break; } return ret; }