SetSimilarity() public method

Expert: Set the Similarity implementation used by this IndexWriter.
public SetSimilarity ( Lucene.Net.Search.Similarity similarity ) : void
similarity Lucene.Net.Search.Similarity
return void
        // Creates index based on selection of analyzer
        public void CreateIndex(string indexPath, string name)
        {
            luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);
            IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
            if (name == "WhitespaceAnalyzer")
            {
                analyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer();
            }
            if (name == "SimpleAnalyzer")
            {
                analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
            }

            if (name == "StandardAnalyzer")
            {
                analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);
            }


            if (name == "StopAnalyzer")
            {
                analyzer = new Lucene.Net.Analysis.StopAnalyzer(VERSION);
            }
            else
            {
                writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl);
            }

            writer.SetSimilarity(customSimilarity);
        }
Beispiel #2
0
 protected override void CreateIndex(string indexPath)
 {
     luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);
     IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
     writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl);
     writer.SetSimilarity(similarity);
 }
Beispiel #3
0
 /// <summary>
 /// Creates the index at a given path
 /// </summary>
 /// <param name="indexPath">The pathname to create the index</param>
 public void CreateIndex(string indexPath)
 {
     luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath + "/IndexStoredPosition");
     IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
     writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl);
     writer.SetSimilarity(mySimilarity);
 }
        /// <summary>
        /// Creates the index at a given path
        /// </summary>
        /// <param name="indexPath">The pathname to create the index</param>
        public void CreateIndex(string indexPath)
        {
            luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);
            IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
            writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl);

            writer.SetSimilarity(new NewSimilarity());  // for similarity measure
        }
        const int MaxMergeDocs = 7999;     //  Except never merge segments that have more docs than this

        public static IndexWriter CreateIndexWriter(Lucene.Net.Store.Directory directory, bool create)
        {
            IndexWriter indexWriter = new IndexWriter(directory, new PackageAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED);
            indexWriter.MergeFactor = MergeFactor;
            indexWriter.MaxMergeDocs = MaxMergeDocs;

            indexWriter.SetSimilarity(new CustomSimilarity());
            return indexWriter;
        }
        private void  CreateIndex(Directory dir)
        {
            IndexWriter iw = new IndexWriter(dir, anlzr, true, IndexWriter.MaxFieldLength.LIMITED);

            iw.SetMaxBufferedDocs(5);
            iw.SetMergeFactor(3);
            iw.SetSimilarity(similarityOne);
            iw.SetUseCompoundFile(true);
            iw.Close();
        }
Beispiel #7
0
        }//contructor which is used to initialize the objects

        //create index
        public void CreateIndex(string indexPath)
        {
            luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);
            analyzerstandard     = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);
            analyzerkeyword      = new Lucene.Net.Analysis.KeywordAnalyzer();
            IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
            analysor = new PerFieldAnalyzerWrapper(analyzerstandard);
            writer   = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analysor, true, mfl);
            writer.SetSimilarity(customSimilarity);//for task 6
        }
Beispiel #8
0
        public override void  SetUp()
        {
            base.SetUp();

            index = new RAMDirectory();
            IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            writer.SetSimilarity(sim);

            // hed is the most important field, dek is secondary

            // d1 is an "ok" match for:  albino elephant
            {
                Document d1 = new Document();
                d1.Add(new Field("id", "d1", Field.Store.YES, Field.Index.NOT_ANALYZED));                 //Field.Keyword("id", "d1"));
                d1.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED));              //Field.Text("hed", "elephant"));
                d1.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED));              //Field.Text("dek", "elephant"));
                writer.AddDocument(d1, null);
            }

            // d2 is a "good" match for:  albino elephant
            {
                Document d2 = new Document();
                d2.Add(new Field("id", "d2", Field.Store.YES, Field.Index.NOT_ANALYZED));                 //Field.Keyword("id", "d2"));
                d2.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED));              //Field.Text("hed", "elephant"));
                d2.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED));                //Field.Text("dek", "albino"));
                d2.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED));              //Field.Text("dek", "elephant"));
                writer.AddDocument(d2, null);
            }

            // d3 is a "better" match for:  albino elephant
            {
                Document d3 = new Document();
                d3.Add(new Field("id", "d3", Field.Store.YES, Field.Index.NOT_ANALYZED));                 //Field.Keyword("id", "d3"));
                d3.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED));                //Field.Text("hed", "albino"));
                d3.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED));              //Field.Text("hed", "elephant"));
                writer.AddDocument(d3, null);
            }

            // d4 is the "best" match for:  albino elephant
            {
                Document d4 = new Document();
                d4.Add(new Field("id", "d4", Field.Store.YES, Field.Index.NOT_ANALYZED));                 //Field.Keyword("id", "d4"));
                d4.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED));                //Field.Text("hed", "albino"));
                d4.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED));              //Field.Text("hed", "elephant"));
                d4.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED));                //Field.Text("dek", "albino"));
                writer.AddDocument(d4, null);
            }

            writer.Close();

            r            = IndexReader.Open(index, true, null);
            s            = new IndexSearcher(r);
            s.Similarity = sim;
        }
Beispiel #9
0
        /// <summary>
        /// Creates the index at indexPath
        /// </summary>
        /// <param name="indexPath">Directory path to create the index</param>
        public void CreateIndex(string indexPath)
        {
            luceneIndexDirectory       = Lucene.Net.Store.FSDirectory.Open(indexPath);
            spellCheckIndexStorage     = Lucene.Net.Store.FSDirectory.Open(indexPath + @"\spell");
            autoCompleteIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath + @"\autocomplete");
            IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
            writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl);

            //changes to Lucene score
            writer.SetSimilarity(newSimilarity);
        }
Beispiel #10
0
        /// <summary> Writes the document to the directory using the analyzer
        /// and the similarity score; returns the SegmentInfo
        /// describing the new segment
        /// </summary>
        /// <param name="dir">
        /// </param>
        /// <param name="analyzer">
        /// </param>
        /// <param name="similarity">
        /// </param>
        /// <param name="doc">
        /// </param>
        /// <throws>  IOException </throws>
        public static SegmentInfo WriteDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc)
        {
            IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetSimilarity(similarity);
            //writer.setUseCompoundFile(false);
            writer.AddDocument(doc);
            writer.Flush();
            SegmentInfo info = writer.NewestSegment();

            writer.Close();
            return(info);
        }
        private void  AddDocs(Directory dir, int ndocs, bool compound)
        {
            IndexWriter iw = new IndexWriter(dir, anlzr, false, IndexWriter.MaxFieldLength.LIMITED);

            iw.SetMaxBufferedDocs(5);
            iw.SetMergeFactor(3);
            iw.SetSimilarity(similarityOne);
            iw.SetUseCompoundFile(compound);
            for (int i = 0; i < ndocs; i++)
            {
                iw.AddDocument(NewDoc());
            }
            iw.Close();
        }
Beispiel #12
0
        public void CreateIndex(string collectionPath, string indexPath)
        {
            HashSet <string> stopWordsSet = new HashSet <string>(STOP_WORDS);

            this.indexPath      = indexPath;
            this.collectionPath = collectionPath;

            luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(this.indexPath);

            analyzer        = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION, "English", stopWordsSet);
            shingleAnalyzer = new Lucene.Net.Analysis.Shingle.ShingleAnalyzerWrapper(analyzer, MAX_SHINGLE_SIZE);

            IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
            writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, shingleAnalyzer, true, mfl);
            writer.SetSimilarity(newSimilarity);

            IndexCollection();
        }
Beispiel #13
0
        public virtual void  TestBasic()
        {
            Directory   dir      = new MockRAMDirectory();
            Analyzer    analyzer = new StandardAnalyzer();
            IndexWriter writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetMergeFactor(2);
            writer.SetMaxBufferedDocs(2);
            writer.SetSimilarity(new SimpleSimilarity());


            System.Text.StringBuilder sb   = new System.Text.StringBuilder(265);
            System.String             term = "term";
            for (int i = 0; i < 30; i++)
            {
                Document d = new Document();
                sb.Append(term).Append(" ");
                System.String content = sb.ToString();
                Field         noTf    = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED);
                noTf.SetOmitTermFreqAndPositions(true);
                d.Add(noTf);

                Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED);
                d.Add(tf);

                writer.AddDocument(d);
                //System.out.println(d);
            }

            writer.Optimize();
            // flush
            writer.Close();
            _TestUtil.CheckIndex(dir);

            /*
             * Verify the index
             */
            Searcher searcher = new IndexSearcher(dir);

            searcher.SetSimilarity(new SimpleSimilarity());

            Term      a  = new Term("noTf", term);
            Term      b  = new Term("tf", term);
            Term      c  = new Term("noTf", "notf");
            Term      d2 = new Term("tf", "tf");
            TermQuery q1 = new TermQuery(a);
            TermQuery q2 = new TermQuery(b);
            TermQuery q3 = new TermQuery(c);
            TermQuery q4 = new TermQuery(d2);


            searcher.Search(q1, new AnonymousClassCountingHitCollector(this));
            //System.out.println(CountingHitCollector.getCount());


            searcher.Search(q2, new AnonymousClassCountingHitCollector1(this));
            //System.out.println(CountingHitCollector.getCount());



            searcher.Search(q3, new AnonymousClassCountingHitCollector2(this));
            //System.out.println(CountingHitCollector.getCount());


            searcher.Search(q4, new AnonymousClassCountingHitCollector3(this));
            //System.out.println(CountingHitCollector.getCount());



            BooleanQuery bq = new BooleanQuery();

            bq.Add(q1, Occur.MUST);
            bq.Add(q4, Occur.MUST);

            searcher.Search(bq, new AnonymousClassCountingHitCollector4(this));
            Assert.IsTrue(15 == CountingHitCollector.GetCount());

            searcher.Close();
            dir.Close();
        }
Beispiel #14
0
		private void  AddDocs(Directory dir, int ndocs, bool compound)
		{
			IndexWriter iw = new IndexWriter(dir, anlzr, false, IndexWriter.MaxFieldLength.LIMITED);
			iw.SetMaxBufferedDocs(5);
			iw.SetMergeFactor(3);
			iw.SetSimilarity(similarityOne);
			iw.SetUseCompoundFile(compound);
			for (int i = 0; i < ndocs; i++)
			{
				iw.AddDocument(NewDoc());
			}
			iw.Close();
		}
Beispiel #15
0
		private void  CreateIndex(Directory dir)
		{
			IndexWriter iw = new IndexWriter(dir, anlzr, true, IndexWriter.MaxFieldLength.LIMITED);
			iw.SetMaxBufferedDocs(5);
			iw.SetMergeFactor(3);
			iw.SetSimilarity(similarityOne);
			iw.SetUseCompoundFile(true);
			iw.Close();
		}
 public void CreateWriter() // Creates index writer
 {
     IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
     writer = new IndexWriter(luceneIndexDirectory, analyzer, true, mfl);
     writer.SetSimilarity(newSimilarity);
 }
Beispiel #17
0
 public virtual void  TestBasic()
 {
     Directory dir = new MockRAMDirectory();
     Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
     IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
     writer.MergeFactor = 2;
     writer.SetMaxBufferedDocs(2);
     writer.SetSimilarity(new SimpleSimilarity());
     
     
     System.Text.StringBuilder sb = new System.Text.StringBuilder(265);
     System.String term = "term";
     for (int i = 0; i < 30; i++)
     {
         Document d = new Document();
         sb.Append(term).Append(" ");
         System.String content = sb.ToString();
         Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED);
         noTf.OmitTermFreqAndPositions = true;
         d.Add(noTf);
         
         Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED);
         d.Add(tf);
         
         writer.AddDocument(d);
         //System.out.println(d);
     }
     
     writer.Optimize();
     // flush
     writer.Close();
     _TestUtil.CheckIndex(dir);
     
     /*
     * Verify the index
     */
     Searcher searcher = new IndexSearcher(dir, true);
     searcher.Similarity = new SimpleSimilarity();
     
     Term a = new Term("noTf", term);
     Term b = new Term("tf", term);
     Term c = new Term("noTf", "notf");
     Term d2 = new Term("tf", "tf");
     TermQuery q1 = new TermQuery(a);
     TermQuery q2 = new TermQuery(b);
     TermQuery q3 = new TermQuery(c);
     TermQuery q4 = new TermQuery(d2);
     
     
     searcher.Search(q1, new AnonymousClassCountingHitCollector(this));
     //System.out.println(CountingHitCollector.getCount());
     
     
     searcher.Search(q2, new AnonymousClassCountingHitCollector1(this));
     //System.out.println(CountingHitCollector.getCount());
     
     
     
     
     
     searcher.Search(q3, new AnonymousClassCountingHitCollector2(this));
     //System.out.println(CountingHitCollector.getCount());
     
     
     searcher.Search(q4, new AnonymousClassCountingHitCollector3(this));
     //System.out.println(CountingHitCollector.getCount());
     
     
     
     BooleanQuery bq = new BooleanQuery();
     bq.Add(q1, Occur.MUST);
     bq.Add(q4, Occur.MUST);
     
     searcher.Search(bq, new AnonymousClassCountingHitCollector4(this));
     Assert.IsTrue(15 == CountingHitCollector.GetCount());
     
     searcher.Close();
     dir.Close();
 }
Beispiel #18
0
		/// <summary> Writes the document to the directory using the analyzer
		/// and the similarity score; returns the SegmentInfo
		/// describing the new segment
		/// </summary>
		/// <param name="dir">
		/// </param>
		/// <param name="analyzer">
		/// </param>
		/// <param name="similarity">
		/// </param>
		/// <param name="doc">
		/// </param>
		/// <throws>  IOException </throws>
		public static SegmentInfo WriteDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc)
		{
			IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
			writer.SetSimilarity(similarity);
			//writer.setUseCompoundFile(false);
			writer.AddDocument(doc);
			writer.Flush();
			SegmentInfo info = writer.NewestSegment();
			writer.Close();
			return info;
		}
        internal static IndexWriter CreateIndexWriter(Lucene.Net.Store.Directory directory)
        {
            bool create = !IndexReader.IndexExists(directory);

            directory.EnsureOpen();

            if (!create)
            {
                if (IndexWriter.IsLocked(directory))
                {
                    IndexWriter.Unlock(directory);
                }
            }

            IndexWriter indexWriter = new IndexWriter(directory, new PackageAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED);
            indexWriter.MergeFactor = MergeFactor;
            indexWriter.MaxMergeDocs = MaxMergeDocs;

            indexWriter.SetSimilarity(new CustomSimilarity());

            return indexWriter;
        }
        private static IndexWriter CreateIndexWriter(Lucene.Net.Store.Directory directory)
        {
            var create = !IndexReader.IndexExists(directory);

            directory.EnsureOpen();

            if (!create)
            {
                if (IndexWriter.IsLocked(directory))
                {
                    IndexWriter.Unlock(directory);
                }
            }

            var indexWriter = new IndexWriter(directory, new PackageAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED);

            NuGetMergePolicyApplyer.ApplyTo(indexWriter);

            indexWriter.SetSimilarity(new CustomSimilarity());

            return indexWriter;
        }