Ejemplo n.º 1
0
        public StoryCollection Find(int hostId, int storyId)
        {
            int? docId = ConvertStoryIdtoDocId(hostId, storyId);

            if (docId.HasValue)
            {
                IndexSearcher indexSearch = SearchQuery.GetSearcher(hostId);
                IndexReader indexReader = indexSearch.GetIndexReader();

                MoreLikeThis mlt = new MoreLikeThis(indexReader);

                mlt.SetAnalyzer(new DnkAnalyzer());
                //mlt.SetFieldNames(new string[] { "title", "description" });

                //these values control the query used to find related/similar stories
                //
                //-we are only using the title and tags fields,
                //-the term must appear 1 or more times,
                //-the query will only have 3 terms
                //-a word less than 3 char in len with be ignored
                //-the term must appear at in at least 4 doc
                mlt.SetFieldNames(new string[] { "title", "tags" });
                mlt.SetMinTermFreq(1);
                mlt.SetMaxQueryTerms(5);
                mlt.SetMinWordLen(3);
                mlt.SetMinDocFreq(4);
                mlt.SetStopWords(StopWords());
                mlt.SetBoost(true);
                Query mltQuery = mlt.Like(docId.Value);

                Hits hits = indexSearch.Search(mltQuery);

                List<int> results = new List<int>();

                for (int i = 0; i < hits.Length(); i++)
                {
                    Document d = hits.Doc(i);
                    int hitStoryId = int.Parse(d.GetField("id").StringValue());

                    if (hitStoryId != storyId)
                    {
                        results.Add(hitStoryId);
                        if (results.Count == NUMBER_OF_RELATED_STORIES_TO_RETURN)
                            break;
                    }
                }

                return SearchQuery.LoadStorySearchResults(results);
            }
            else
                return null;
        }
Ejemplo n.º 2
0
		public IList<CorpusDocument> GetMoreLikeThis(string indexName, int indexDocumentId, int maxDocs)
		{
			// See: http://lucene.apache.org/java/2_2_0/api/org/apache/lucene/search/similar/MoreLikeThis.html

			var searcher = GetSearcher(indexName);
			var mlt = new MoreLikeThis(searcher.GetIndexReader());
			mlt.SetAnalyzer(GetAnalyzer(SearchType.Morphologic));
			mlt.SetFieldNames(new string[] {"Title", "Content"});
			mlt.SetMinWordLen(4); // to avoid most Hebrew ambigous stop-words

			var query = mlt.Like(indexDocumentId);

			var tsdc = TopScoreDocCollector.create(maxDocs, true);
			searcher.Search(query, tsdc);
			var hits = tsdc.TopDocs().scoreDocs;

			var ret = new List<CorpusDocument>(maxDocs);

			foreach (var hit in hits)
			{
				var d = searcher.Doc(hit.doc);
				ret.Add(new CorpusDocument
				        	{
				        		Id = d.Get("Id"),
				        		Title = d.Get("Title"),
				        	});
			}
			return ret;
		}
Ejemplo n.º 3
0
        static void Main(string[] args)
        {
            var stopWords = new Hashtable(StringComparer.InvariantCultureIgnoreCase);
            foreach (var s in StopWords)
            {
                stopWords[s] = s;
            }
            var connectionString = "Data Source=(local);Initial Catalog=AllgressDB;Integrated Security=true";
            Repo = new Repository(connectionString);
            var directory = PopulateIndex();

            //SearchForTerm(directory);
            var policy = Repo.GetPolicySection(35);
            string text = Convert(policy.Text);
            using (var reader = DirectoryReader.Open(directory, true))
            using (var indexSearcher = new IndexSearcher(reader))
            {
                var moreLikeThis = new MoreLikeThis(reader);
                moreLikeThis.SetStopWords(stopWords);
                moreLikeThis.SetFieldNames(new[] { "description", "procedures", "objectives", "references" });
                moreLikeThis.SetBoost(true);
                moreLikeThis.SetMinDocFreq(2);
                moreLikeThis.SetMinTermFreq(1);
                var query = moreLikeThis.Like(text);
                (query as BooleanQuery).Add(new TermQuery(new Term("type", "policy")), Occur.MUST_NOT);
                moreLikeThis.SetAnalyzer(new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29));
                var docs = from scoreDoc in indexSearcher.Search(query, null, 1000).ScoreDocs
                           select new ScoredDocument { Score = scoreDoc.Score, Document = indexSearcher.Doc(scoreDoc.Doc) };
                var results = docs.ToArray();
            }
            Console.ReadKey();
        }