public void GetDistance_Different() { string query = "<query><RecordSource ComparisonType=\"one of\">ELibrary</RecordSource><Title ComparisonType=\"FreeTextSearchWithStemming\">Защита информации</Title></query>"; SearchRequest request = new SearchRequest(query, "s1"); ExternalCatalogBook book = new ExternalCatalogBook("1", "Методы верификации"); Assert.IsTrue(NGramAnalyzer.GetDistance(request, book) > 0.4); }
public void GetDistance_Identical() { string query = "<query><RecordSource ComparisonType=\"one of\">ELibrary</RecordSource><Title ComparisonType=\"FreeTextSearchWithStemming\">Защита информации</Title></query>"; SearchRequest request = new SearchRequest(query, "s1"); ExternalCatalogBook book = new ExternalCatalogBook("1", "Защита информации"); Assert.AreEqual(NGramAnalyzer.GetDistance(request, book), 0); }
public void IsSimilar_Identical() { string query = "<query><RecordSource ComparisonType=\"one of\">ELibrary</RecordSource><Title ComparisonType=\"FreeTextSearchWithStemming\">Защита информации</Title></query>"; SearchRequest request1 = new SearchRequest(query, "s1"); SearchRequest request2 = new SearchRequest(query, "s2"); Assert.IsTrue(NGramAnalyzer.IsSimilar(request1, request2)); }
public void IsSimilar_Different() { string query1 = "<query><RecordSource ComparisonType=\"one of\">ELibrary</RecordSource><Title ComparisonType=\"FreeTextSearchWithStemming\">Защита информации</Title></query>"; string query2 = "<query><RecordSource ComparisonType=\"one of\">ELibrary</RecordSource><Title ComparisonType=\"FreeTextSearchWithStemming\">Методы верификации</Title></query>"; SearchRequest request1 = new SearchRequest(query1, "s1"); SearchRequest request2 = new SearchRequest(query2, "s2"); Assert.IsFalse(NGramAnalyzer.IsSimilar(request1, request2)); }
/// <summary> /// /// </summary> /// <remarks></remarks> /// <seealso cref=""/> /// <param name="origQuery"></param> /// <param name="queryFilter"></param> /// <param name="searchtext"></param> /// <returns></returns> public static IEnumerable<TextValue> doTextSearch(Query origQuery, String queryFilter, String searchtext) { String filter = queryFilter; BooleanQuery query = new BooleanQuery(); query.Add(origQuery, Occur.MUST); if (!filter.ToLower().StartsWith("ng_")) { filter = "ng_" + filter; } if (filter.ToLower().Equals("ng_all")) { filter = "ng_all"; queryFilter = "ng_all"; } HashSet<string> uniqueText = new HashSet<string>(); searchtext = searchtext.ToLower(); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, filter, new KeywordAnalyzer()); parser.DefaultOperator = QueryParser.Operator.AND; Query X1 = parser.Parse(searchtext); query.Add(X1, Occur.MUST); // Query query = parser.Parse("tree data"); TopDocs tds = searcher.Search(query, 50); QueryScorer scorer = new QueryScorer(query, searchtext); Analyzer analyzer = new NGramAnalyzer(); List<TextValue> autoCompleteTextList = new List<TextValue>(); foreach (ScoreDoc sd in tds.ScoreDocs) { Document doc = searcher.Doc(sd.Doc); String docId = doc.GetField("doc_id").StringValue; TermQuery q1 = new TermQuery(new Term("id", docId.ToLower())); TermQuery q0 = new TermQuery(new Term("field", queryFilter.ToLower())); QueryParser parser1 = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "value", new KeywordAnalyzer()); parser1.DefaultOperator = QueryParser.Operator.AND; Query q2 = parser1.Parse(searchtext); BooleanQuery q3 = new BooleanQuery(); q3.Add(q1, Occur.MUST); q3.Add(q2, Occur.MUST); q3.Add(q0, Occur.MUST); TopDocs tdAutoComp = autoCompleteSearcher.Search(q3, 100); foreach (ScoreDoc sdAutoComp in tdAutoComp.ScoreDocs) { Document docAutoComp = autoCompleteSearcher.Doc(sdAutoComp.Doc); String toAdd = docAutoComp.GetField("value").StringValue; if (!uniqueText.Contains(toAdd)) { TextValue tv = new TextValue(); tv.Name = toAdd; tv.Value = toAdd; autoCompleteTextList.Add(tv); uniqueText.Add(toAdd); } } if (autoCompleteTextList.Count > 7) break; } return autoCompleteTextList; }
/// <summary> /// /// </summary> /// <remarks></remarks> /// <seealso cref=""/> /// <param name="origQuery"></param> /// <param name="queryFilter"></param> /// <param name="searchtext"></param> /// <returns></returns> public static IEnumerable <TextValue> doTextSearch(Query origQuery, String queryFilter, String searchtext) { String filter = queryFilter; BooleanQuery query = new BooleanQuery(); query.Add(origQuery, Occur.MUST); if (!filter.ToLower().StartsWith("ng_")) { filter = "ng_" + filter; } if (filter.ToLower().Equals("ng_all")) { filter = "ng_all"; queryFilter = "ng_all"; } HashSet <string> uniqueText = new HashSet <string>(); searchtext = searchtext.ToLower(); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, filter, new KeywordAnalyzer()); parser.DefaultOperator = QueryParser.Operator.AND; Query X1 = parser.Parse(searchtext); query.Add(X1, Occur.MUST); // Query query = parser.Parse("tree data"); TopDocs tds = searcher.Search(query, 50); QueryScorer scorer = new QueryScorer(query, searchtext); Analyzer analyzer = new NGramAnalyzer(); List <TextValue> autoCompleteTextList = new List <TextValue>(); foreach (ScoreDoc sd in tds.ScoreDocs) { Document doc = searcher.Doc(sd.Doc); String docId = doc.GetField("doc_id").StringValue; TermQuery q1 = new TermQuery(new Term("id", docId.ToLower())); TermQuery q0 = new TermQuery(new Term("field", queryFilter.ToLower())); QueryParser parser1 = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "value", new KeywordAnalyzer()); parser1.DefaultOperator = QueryParser.Operator.AND; Query q2 = parser1.Parse(searchtext); BooleanQuery q3 = new BooleanQuery(); q3.Add(q1, Occur.MUST); q3.Add(q2, Occur.MUST); q3.Add(q0, Occur.MUST); TopDocs tdAutoComp = autoCompleteSearcher.Search(q3, 100); foreach (ScoreDoc sdAutoComp in tdAutoComp.ScoreDocs) { Document docAutoComp = autoCompleteSearcher.Doc(sdAutoComp.Doc); String toAdd = docAutoComp.GetField("value").StringValue; if (!uniqueText.Contains(toAdd)) { TextValue tv = new TextValue(); tv.Name = toAdd; tv.Value = toAdd; autoCompleteTextList.Add(tv); uniqueText.Add(toAdd); } } if (autoCompleteTextList.Count > 7) { break; } } return(autoCompleteTextList); }