public StoryCollection Find(int hostId, int storyId)
        {
            int? docId = ConvertStoryIdtoDocId(hostId, storyId);

            if (docId.HasValue)
            {
                IndexSearcher indexSearch = SearchQuery.GetSearcher(hostId);
                IndexReader indexReader = indexSearch.GetIndexReader();

                MoreLikeThis mlt = new MoreLikeThis(indexReader);

                mlt.SetAnalyzer(new DnkAnalyzer());
                //mlt.SetFieldNames(new string[] { "title", "description" });

                //these values control the query used to find related/similar stories
                //
                //-we are only using the title and tags fields,
                //-the term must appear 1 or more times,
                //-the query will only have 3 terms
                //-a word less than 3 char in len with be ignored
                //-the term must appear at in at least 4 doc
                mlt.SetFieldNames(new string[] { "title", "tags" });
                mlt.SetMinTermFreq(1);
                mlt.SetMaxQueryTerms(5);
                mlt.SetMinWordLen(3);
                mlt.SetMinDocFreq(4);
                mlt.SetStopWords(StopWords());
                mlt.SetBoost(true);
                Query mltQuery = mlt.Like(docId.Value);

                Hits hits = indexSearch.Search(mltQuery);

                List<int> results = new List<int>();

                for (int i = 0; i < hits.Length(); i++)
                {
                    Document d = hits.Doc(i);
                    int hitStoryId = int.Parse(d.GetField("id").StringValue());

                    if (hitStoryId != storyId)
                    {
                        results.Add(hitStoryId);
                        if (results.Count == NUMBER_OF_RELATED_STORIES_TO_RETURN)
                            break;
                    }
                }

                return SearchQuery.LoadStorySearchResults(results);
            }
            else
                return null;
        }
        public static void  Main(System.String[] a)
        {
            System.String indexName = "localhost_index";
            System.String fn = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en";
            System.Uri url = null;
            for (int i = 0; i < a.Length; i++)
            {
                if (a[i].Equals("-i"))
                {
                    indexName = a[++i];
                }
                else if (a[i].Equals("-f"))
                {
                    fn = a[++i];
                }
                else if (a[i].Equals("-url"))
                {
                    url = new System.Uri(a[++i]);
                }
            }
			
            System.IO.StreamWriter temp_writer;
            temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding);
            temp_writer.AutoFlush = true;
            System.IO.StreamWriter o = temp_writer;
            IndexReader r = IndexReader.Open(indexName);
            o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs");
			
            MoreLikeThis mlt = new MoreLikeThis(r);
			
            o.WriteLine("Query generation parameters:");
            o.WriteLine(mlt.DescribeParams());
            o.WriteLine();
			
            Query query = null;
            if (url != null)
            {
                o.WriteLine("Parsing URL: " + url);
                query = mlt.Like(url);
            }
            else if (fn != null)
            {
                o.WriteLine("Parsing file: " + fn);
                query = mlt.Like(new System.IO.FileInfo(fn));
            }
			
            o.WriteLine("q: " + query);
            o.WriteLine();
            IndexSearcher searcher = new IndexSearcher(indexName);
			
            Hits hits = searcher.Search(query);
            int len = hits.Length();
            o.WriteLine("found: " + len + " documents matching");
            o.WriteLine();
            for (int i = 0; i < System.Math.Min(25, len); i++)
            {
                Document d = hits.Doc(i);
                System.String summary = d.Get("summary");
                o.WriteLine("score  : " + hits.Score(i));
                o.WriteLine("url    : " + d.Get("url"));
                o.WriteLine("\ttitle  : " + d.Get("title"));
                if (summary != null)
                    o.WriteLine("\tsummary: " + d.Get("summary"));
                o.WriteLine();
            }
        }
Example #3
0
        public static void  Main(System.String[] a)
        {
            System.String indexName = "localhost_index";
            System.String fn        = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en";
            System.Uri    url       = null;
            for (int i = 0; i < a.Length; i++)
            {
                if (a[i].Equals("-i"))
                {
                    indexName = a[++i];
                }
                else if (a[i].Equals("-f"))
                {
                    fn = a[++i];
                }
                else if (a[i].Equals("-url"))
                {
                    url = new System.Uri(a[++i]);
                }
            }

            System.IO.StreamWriter temp_writer;
            temp_writer           = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding);
            temp_writer.AutoFlush = true;
            System.IO.StreamWriter o = temp_writer;
            IndexReader            r = IndexReader.Open(indexName);

            o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs");

            MoreLikeThis mlt = new MoreLikeThis(r);

            o.WriteLine("Query generation parameters:");
            o.WriteLine(mlt.DescribeParams());
            o.WriteLine();

            Query query = null;

            if (url != null)
            {
                o.WriteLine("Parsing URL: " + url);
                query = mlt.Like(url);
            }
            else if (fn != null)
            {
                o.WriteLine("Parsing file: " + fn);
                query = mlt.Like(new System.IO.FileInfo(fn));
            }

            o.WriteLine("q: " + query);
            o.WriteLine();
            IndexSearcher searcher = new IndexSearcher(indexName);

            Hits hits = searcher.Search(query);
            int  len  = hits.Length();

            o.WriteLine("found: " + len + " documents matching");
            o.WriteLine();
            for (int i = 0; i < System.Math.Min(25, len); i++)
            {
                Document      d       = hits.Doc(i);
                System.String summary = d.Get("summary");
                o.WriteLine("score  : " + hits.Score(i));
                o.WriteLine("url    : " + d.Get("url"));
                o.WriteLine("\ttitle  : " + d.Get("title"));
                if (summary != null)
                {
                    o.WriteLine("\tsummary: " + d.Get("summary"));
                }
                o.WriteLine();
            }
        }
		public IList<CorpusDocument> GetMoreLikeThis(string indexName, int indexDocumentId, int maxDocs)
		{
			// See: http://lucene.apache.org/java/2_2_0/api/org/apache/lucene/search/similar/MoreLikeThis.html

			var searcher = GetSearcher(indexName);
			var mlt = new MoreLikeThis(searcher.GetIndexReader());
			mlt.SetAnalyzer(GetAnalyzer(SearchType.Morphologic));
			mlt.SetFieldNames(new string[] {"Title", "Content"});
			mlt.SetMinWordLen(4); // to avoid most Hebrew ambigous stop-words

			var query = mlt.Like(indexDocumentId);

			var tsdc = TopScoreDocCollector.create(maxDocs, true);
			searcher.Search(query, tsdc);
			var hits = tsdc.TopDocs().scoreDocs;

			var ret = new List<CorpusDocument>(maxDocs);

			foreach (var hit in hits)
			{
				var d = searcher.Doc(hit.doc);
				ret.Add(new CorpusDocument
				        	{
				        		Id = d.Get("Id"),
				        		Title = d.Get("Title"),
				        	});
			}
			return ret;
		}
Example #5
0
        static void Main(string[] args)
        {
            var stopWords = new Hashtable(StringComparer.InvariantCultureIgnoreCase);
            foreach (var s in StopWords)
            {
                stopWords[s] = s;
            }
            var connectionString = "Data Source=(local);Initial Catalog=AllgressDB;Integrated Security=true";
            Repo = new Repository(connectionString);
            var directory = PopulateIndex();

            //SearchForTerm(directory);
            var policy = Repo.GetPolicySection(35);
            string text = Convert(policy.Text);
            using (var reader = DirectoryReader.Open(directory, true))
            using (var indexSearcher = new IndexSearcher(reader))
            {
                var moreLikeThis = new MoreLikeThis(reader);
                moreLikeThis.SetStopWords(stopWords);
                moreLikeThis.SetFieldNames(new[] { "description", "procedures", "objectives", "references" });
                moreLikeThis.SetBoost(true);
                moreLikeThis.SetMinDocFreq(2);
                moreLikeThis.SetMinTermFreq(1);
                var query = moreLikeThis.Like(text);
                (query as BooleanQuery).Add(new TermQuery(new Term("type", "policy")), Occur.MUST_NOT);
                moreLikeThis.SetAnalyzer(new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29));
                var docs = from scoreDoc in indexSearcher.Search(query, null, 1000).ScoreDocs
                           select new ScoredDocument { Score = scoreDoc.Score, Document = indexSearcher.Doc(scoreDoc.Doc) };
                var results = docs.ToArray();
            }
            Console.ReadKey();
        }