コード例 #1
0
ファイル: Index.cs プロジェクト: marcoaoteixeira/InfoFenix
        private void Initialize()
        {
            _directory = LuceneFSDirectory.Open(new DirectoryInfo(Path.Combine(_basePath, Name)));

            // Creates the index directory
            using (CreateIndexWriter()) { }
        }
コード例 #2
0
        /// <summary>
        /// Initializes a new instance of the <see cref="Indexer"/> class.
        /// </summary>
        /// <param name="path">The path to the .xml.bz2 dump of wikipedia</param>
        public Indexer(string path)
        {
            filePath = path;

            indexPath = Path.ChangeExtension(path, ".idx");
            Lucene.Net.Store.Directory idxDir = FSDirectory.Open(new DirectoryInfo(indexPath));

            if (Directory.Exists(indexPath) &&
                IndexReader.IndexExists(idxDir))
            {
                indexExists = true;
            }

            if (indexExists)
            {
                searcher = new IndexSearcher(idxDir, true);
            }

            textAnalyzer = GuessAnalyzer(filePath, out _IsRTL);

            queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "title", textAnalyzer);

            queryParser.SetDefaultOperator(QueryParser.Operator.AND);

            abortIndexing = false;

            multithreadedIndexing = (Environment.ProcessorCount > 1);
        }
コード例 #3
0
ファイル: DeleteFiles.cs プロジェクト: stgwilli/ravendb
        public static void  Main(System.String[] args)
        {
            System.String usage = typeof(DeleteFiles) + " <unique_term>";
            if (args.Length == 0)
            {
                System.Console.Error.WriteLine("Usage: " + usage);
                System.Environment.Exit(1);
            }
            try
            {
                Directory   directory = FSDirectory.Open(new System.IO.FileInfo("index"));
                IndexReader reader    = IndexReader.Open(directory, false);              // we don't want read-only because we are about to delete

                Term term    = new Term("path", args[0]);
                int  deleted = reader.DeleteDocuments(term);

                System.Console.Out.WriteLine("deleted " + deleted + " documents containing " + term);

                // one can also delete documents by their internal id:

                /*
                 * for (int i = 0; i < reader.maxDoc(); i++) {
                 * System.out.println("Deleting document with id " + i);
                 * reader.delete(i);
                 * }*/

                reader.Close();
                directory.Close();
            }
            catch (System.Exception e)
            {
                System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }
コード例 #4
0
        public virtual void  TestOptimizeOldIndex()
        {
            int hasTested29 = 0;

            for (int i = 0; i < oldNames.Length; i++)
            {
                System.String dirName = Paths.CombinePath(Paths.ProjectRootDirectory, "test/core/index/index." + oldNames[i]);
                Unzip(dirName, oldNames[i]);
                System.String fullPath = FullDir(oldNames[i]);
                Directory     dir      = FSDirectory.Open(new System.IO.DirectoryInfo(fullPath));

                if (oldNames[i].StartsWith("29."))
                {
                    assertCompressedFields29(dir, true);
                    hasTested29++;
                }

                IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
                w.Optimize();
                w.Close();

                _TestUtil.CheckIndex(dir);

                if (oldNames[i].StartsWith("29."))
                {
                    assertCompressedFields29(dir, false);
                    hasTested29++;
                }

                dir.Close();
                RmDir(oldNames[i]);
            }
            Assert.AreEqual(4, hasTested29, "test for compressed field should have run 4 times");
        }
コード例 #5
0
        static void Query()
        {
            try
            {
                // Counterpart to the IndexWriter used above.
                IndexReader reader = IndexReader.Open(FSDirectory.Open(INDEX_DIR), true);
                Console.Out.WriteLine("Number of indexed docs: " + reader.NumDocs());

                IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(INDEX_DIR));

                // Construct a search query. String 1 in term is the field name, String 2 the content to match against.
                // The name of the field "content" here and how we filled a "content" field using the IndexWrite above.
                // Apparently  BooleanQuery  can be used to combine search queries.
                Term      searchTerm = new Term("content", "defg");
                TermQuery query      = new TermQuery(searchTerm);

                // Used to collect the highest scoring hits when searching.
                TopScoreDocCollector topDocColl = TopScoreDocCollector.Create(10, true);
                searcher.Search(query, topDocColl);

                // Collection of documents matched using the search query.
                TopDocs topDocs = topDocColl.TopDocs();
                Console.WriteLine("Number of hits: " + topDocs.TotalHits);

                // Traverse through the search hits, print their index in the query and the document's name.
                foreach (var searchHit in topDocs.ScoreDocs)
                {
                    Console.WriteLine(searchHit.Doc + ". " + searcher.Doc(searchHit.Doc).GetField("name").StringValue);
                }
            }
            catch (IOException excep)
            {
                Console.Out.WriteLine(excep.Message);
            }
        }
コード例 #6
0
ファイル: IndexFiles.cs プロジェクト: sainabob/teamlab.v7.5
        public static void  Main(System.String[] args)
        {
            System.String usage = typeof(IndexFiles) + " <root_directory>";
            if (args.Length == 0)
            {
                System.Console.Error.WriteLine("Usage: " + usage);
                System.Environment.Exit(1);
            }

            bool tmpBool;

            if (System.IO.File.Exists(INDEX_DIR.FullName))
            {
                tmpBool = true;
            }
            else
            {
                tmpBool = System.IO.Directory.Exists(INDEX_DIR.FullName);
            }
            if (tmpBool)
            {
                System.Console.Out.WriteLine("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first");
                System.Environment.Exit(1);
            }

            System.IO.FileInfo docDir = new System.IO.FileInfo(args[0]);
            bool tmpBool2;

            if (System.IO.File.Exists(docDir.FullName))
            {
                tmpBool2 = true;
            }
            else
            {
                tmpBool2 = System.IO.Directory.Exists(docDir.FullName);
            }
            if (!tmpBool2)             // || !docDir.canRead()) // {{Aroush}} what is canRead() in C#?
            {
                System.Console.Out.WriteLine("Document directory '" + docDir.FullName + "' does not exist or is not readable, please check the path");
                System.Environment.Exit(1);
            }

            System.DateTime start = System.DateTime.Now;
            try
            {
                IndexWriter writer = new IndexWriter(FSDirectory.Open(INDEX_DIR), new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);
                System.Console.Out.WriteLine("Indexing to directory '" + INDEX_DIR + "'...");
                IndexDocs(writer, docDir);
                System.Console.Out.WriteLine("Optimizing...");
                writer.Optimize();
                writer.Close();

                System.DateTime end = System.DateTime.Now;
                System.Console.Out.WriteLine(end.Millisecond - start.Millisecond + " total milliseconds");
            }
            catch (System.IO.IOException e)
            {
                System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }
コード例 #7
0
        /* Walk directory hierarchy in uid order, while keeping uid iterator from
         * /* existing index in sync.  Mismatches indicate one of: (a) old documents to
         * /* be deleted; (b) unchanged documents, to be left alone; or (c) new
         * /* documents, to be indexed.
         */

        private static void  IndexDocs(System.IO.FileInfo file, System.IO.FileInfo index, bool create)
        {
            if (!create)
            {
                // incrementally update

                reader  = IndexReader.Open(FSDirectory.Open(index), false);  // open existing index
                uidIter = reader.Terms(new Term("uid", ""));                 // init uid iterator

                IndexDocs(file);

                if (deleting)
                {
                    // delete rest of stale docs
                    while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid")
                    {
                        System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text()));
                        reader.DeleteDocuments(uidIter.Term());
                        uidIter.Next();
                    }
                    deleting = false;
                }

                uidIter.Close();                // close uid iterator
                reader.Close();                 // close existing index
            }
            // don't have exisiting
            else
            {
                IndexDocs(file);
            }
        }
コード例 #8
0
        public virtual void  TestThreadedOptimize_Renamed()
        {
            Directory directory = new MockRAMDirectory();

            runTest(directory, false, new SerialMergeScheduler());
            runTest(directory, true, new SerialMergeScheduler());
            runTest(directory, false, new ConcurrentMergeScheduler());
            runTest(directory, true, new ConcurrentMergeScheduler());
            directory.Close();

            System.String tempDir = SupportClass.AppSettings.Get("tempDir", "");
            if (tempDir == null)
            {
                throw new System.IO.IOException("tempDir undefined, cannot run test");
            }

            System.String dirName = tempDir + "/luceneTestThreadedOptimize";
            directory = FSDirectory.Open(new System.IO.FileInfo(dirName));
            runTest(directory, false, new SerialMergeScheduler());
            runTest(directory, true, new SerialMergeScheduler());
            runTest(directory, false, new ConcurrentMergeScheduler());
            runTest(directory, true, new ConcurrentMergeScheduler());
            directory.Close();
            _TestUtil.RmDir(dirName);
        }
コード例 #9
0
        /* Walk directory hierarchy in uid order, while keeping uid iterator from
         * /* existing index in sync.  Mismatches indicate one of: (a) old documents to
         * /* be deleted; (b) unchanged documents, to be left alone; or (c) new
         * /* documents, to be indexed.
         */

        private static void IndexDocs(IndexWriter writer, DirectoryInfo file, DirectoryInfo index, Operation operation)
        {
            if (operation == Operation.CompleteReindex)
            {
                // Perform a full reindexing.
                IndexDirectory(writer, null, file, operation);
            }
            else
            {
                // Perform an incremental reindexing.

                using (var reader = IndexReader.Open(FSDirectory.Open(index), true)) // open existing index
                    using (var uidIter = reader.Terms(new Term("uid", "")))          // init uid iterator
                    {
                        IndexDirectory(writer, uidIter, file, operation);

                        if (operation == Operation.RemoveStale)
                        {
                            // Delete remaining, presumed stale, documents. This works since
                            // the above call to IndexDirectory should have positioned the uidIter
                            // after any uids matching existing documents. Any remaining uid
                            // is remains from documents that has been deleted since they was
                            // indexed.
                            while (uidIter.Term != null && uidIter.Term.Field == "uid")
                            {
                                Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term.Text));
                                writer.DeleteDocuments(uidIter.Term);
                                uidIter.Next();
                            }
                        }
                    }
            }
        }
コード例 #10
0
ファイル: Program.cs プロジェクト: svolchkov/LuceneUpdate
        static void Main(string[] args)
        {
            var docDir = new DirectoryInfo(@"C:\clsdev");

            indexUpdateTime = Directory.GetLastWriteTime(indexLocation);

            try
            {
                using (var writer = new IndexWriter(FSDirectory.Open(INDEX_DIR), new CustomAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    Console.Out.WriteLine("Indexing to directory '" + INDEX_DIR + "'...");
                    IndexDirectory(writer, docDir);
                    Console.Out.WriteLine("Optimizing...");
                    writer.Optimize();
                    writer.Commit();
                }
                var end = DateTime.Now;
                //Console.Out.WriteLine(end.Millisecond - start.Millisecond + " total milliseconds");
            }
            catch (IOException e)
            {
                Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
            deleteDocs();
        }
コード例 #11
0
        void BuildIndex()
        {
            IndexWriter writer    = new IndexWriter(FSDirectory.Open(m_indexDir), new SnowballAnalyzer(Version.LUCENE_30, "English"), true, IndexWriter.MaxFieldLength.UNLIMITED);
            var         documents = new List <Document>();

            foreach (var fileInfo in new DirectoryInfo(m_xmlRoot).EnumerateFiles())
            {
                // Read and parse XML file
                string    xmlText   = File.ReadAllText(m_xmlRoot + fileInfo.Name);
                string    topicName = fileInfo.Name.Substring(0, fileInfo.Name.IndexOf('.'));
                XmlReader reader    = XmlReader.Create(new StringReader(xmlText));

                // Create index data from XML
                while (reader.Read())
                {
                    var doc = new Document();
                    doc.Add(new Field("Topic", topicName, Field.Store.YES, Field.Index.ANALYZED));

                    for (int i = 0; i < reader.AttributeCount; i++)
                    {
                        reader.MoveToAttribute(i);
                        doc.Add(new Field(reader.Name, reader.Value, Field.Store.YES, Field.Index.ANALYZED));
                    }

                    writer.AddDocument(doc);
                }
            }
            ;

            writer.Optimize();
            writer.Commit();
            writer.Dispose();
        }
コード例 #12
0
        static void Index()
        {
            // Used to write out an index for later usage.
            IndexWriter writer = new IndexWriter(FSDirectory.Open(INDEX_DIR), new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED);

            try
            {
                // A series of fields, used for indexing and searching.
                Document doc1 = new Document();

                // https://lucenenet.apache.org/docs/3.0.3/db/d65/class_lucene_1_1_net_1_1_documents_1_1_document.html#details
                // https://lucene.apache.org/core/3_0_3/api/core/org/apache/lucene/document/Field.Index.html
                // A field contains a name and a string value.
                // If a field is stored, it will be returned with the document when
                // a search lands a hit on it.
                // Not indexing a field leads to it not being searchable.
                doc1.Add(new Field("name", "doc 1", Field.Store.YES, Field.Index.NO));

                // This field is indexed, meaning it is searchable using a query, as well as analyzed,
                // meaning it is run through an analyzer ( which leads to it being split into a searchable, tokenized form ).
                // Alternatively, a field may be indexed but not analyzed, leading to it being stored as a single token.
                // This may be useful for single value fields like names or ids.
                doc1.Add(new Field("content", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));

                // Finally, we add the document to index writer.
                writer.AddDocument(doc1);

                Document doc2 = new Document();
                doc2.Add(new Field("name", "doc 2", Field.Store.YES, Field.Index.NO));
                doc2.Add(new Field("content", "abc defg defg defg", Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc2);

                Document doc3 = new Document();
                doc3.Add(new Field("name", "doc 3", Field.Store.YES, Field.Index.NO));
                doc3.Add(new Field("content", "qwerty defg defg", Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc3);

                Console.Out.WriteLine("Optimizing...");

                // Optimizes the created index for fast search. Optimally call when the index is
                // created fully.
                writer.Optimize();

                // Takes all changes to the index and merges them. After this call all changes
                // are permanently written into the index.
                writer.Commit();
            }
            catch (IOException excep)
            {
                Console.WriteLine(excep.Message);
            }

            writer.Dispose();
        }
コード例 #13
0
        /* Open pre-lockless index, add docs, do a delete &
         * setNorm, and search */
        public virtual void  ChangeIndexNoAdds(System.String dirName)
        {
            dirName = FullDir(dirName);

            Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));

            // make sure searching sees right # hits
            IndexSearcher searcher = new IndexSearcher(dir, true);

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length, "wrong number of hits");
            Document d = searcher.Doc(hits[0].Doc);

            Assert.AreEqual("21", d.Get("id"), "wrong first document");
            searcher.Close();

            // make sure we can do a delete & setNorm against this
            // pre-lockless segment:
            IndexReader reader     = IndexReader.Open(dir, false);
            Term        searchTerm = new Term("id", "6");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "wrong delete count");
            reader.SetNorm(22, "content", (float)2.0);
            reader.Close();

            // make sure they "took":
            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(33, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 33, searcher.IndexReader);
            searcher.Close();

            // optimize
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.Optimize();
            writer.Close();

            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(33, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 33, searcher.IndexReader);
            searcher.Close();

            dir.Close();
        }
コード例 #14
0
ファイル: TestPayloads.cs プロジェクト: ravendb/lucenenet
        public virtual void  TestPayloadsEncoding()
        {
            rnd = NewRandom();
            // first perform the test using a RAMDirectory
            Directory dir = new RAMDirectory();

            PerformTest(dir);

            // now use a FSDirectory and repeat same test
            System.IO.DirectoryInfo dirName = _TestUtil.GetTempDir("test_payloads");
            dir = FSDirectory.Open(dirName);
            PerformTest(dir);
            _TestUtil.RmDir(dirName);
        }
コード例 #15
0
        public virtual void  TestOptimizeOldIndex()
        {
            for (int i = 0; i < oldNames.Length; i++)
            {
                System.String dirName = "../../test/core/index/index." + oldNames[i];
                Unzip(dirName, oldNames[i]);
                System.String fullPath = FullDir(oldNames[i]);
                Directory     dir      = FSDirectory.Open(new System.IO.FileInfo(fullPath));
                IndexWriter   w        = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
                w.Optimize();
                w.Close();

                _TestUtil.CheckIndex(dir);
                dir.Close();
                RmDir(oldNames[i]);
            }
        }
コード例 #16
0
        public IndexController()
        {
            m_xmlRoot = Config.Get().m_pathToXML;

            if (!Directory.Exists(m_xmlRoot + @"..\index"))
            {
                Directory.CreateDirectory(m_xmlRoot + @"..\index");
            }

            m_indexDir = new DirectoryInfo(m_xmlRoot + @"..\index");
            if (m_indexDir.GetFiles().Length == 0)
            {
                BuildIndex();
            }

            m_searcher = new IndexSearcher(FSDirectory.Open(m_indexDir));
        }
コード例 #17
0
        private void  TestIndexInternal(int maxWait)
        {
            bool create = true;

            //Directory rd = new RAMDirectory();
            // work on disk to make sure potential lock problems are tested:
            System.String tempDir = System.IO.Path.GetTempPath();
            if (tempDir == null)
            {
                throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test");
            }
            System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex"));
            Directory          rd       = FSDirectory.Open(indexDir);

            IndexThread.id = 0;
            IndexThread.idStack.Clear();
            IndexModifier index   = new IndexModifier(rd, new StandardAnalyzer(), create);
            IndexThread   thread1 = new IndexThread(index, maxWait, 1);

            thread1.Start();
            IndexThread thread2 = new IndexThread(index, maxWait, 2);

            thread2.Start();
            while (thread1.IsAlive || thread2.IsAlive)
            {
                System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 100));
            }
            index.Optimize();
            int added   = thread1.added + thread2.added;
            int deleted = thread1.deleted + thread2.deleted;

            Assert.AreEqual(added - deleted, index.DocCount());
            index.Close();

            try
            {
                index.Close();
                Assert.Fail();
            }
            catch (System.SystemException e)
            {
                // expected exception
            }
            RmDir(indexDir);
        }
コード例 #18
0
        public static void Main(String[] args)
        {
            var usage = typeof(IndexFiles) + " <root_directory>";

            if (args.Length == 0)
            {
                Console.Error.WriteLine("Usage: " + usage);
                Environment.Exit(1);
            }

            if (File.Exists(INDEX_DIR.FullName) || Directory.Exists(INDEX_DIR.FullName))
            {
                Console.Out.WriteLine("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first");
                Environment.Exit(1);
            }

            var docDir       = new DirectoryInfo(args[0]);
            var docDirExists = File.Exists(docDir.FullName) || Directory.Exists(docDir.FullName);

            if (!docDirExists) // || !docDir.canRead()) // {{Aroush}} what is canRead() in C#?
            {
                Console.Out.WriteLine("Document directory '" + docDir.FullName + "' does not exist or is not readable, please check the path");
                Environment.Exit(1);
            }

            var start = DateTime.Now;

            try
            {
                using (var writer = new IndexWriter(FSDirectory.Open(INDEX_DIR), new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED))
                {
                    Console.Out.WriteLine("Indexing to directory '" + INDEX_DIR + "'...");
                    IndexDirectory(writer, docDir);
                    Console.Out.WriteLine("Optimizing...");
                    writer.Optimize();
                    writer.Commit();
                }
                var end = DateTime.Now;
                Console.Out.WriteLine(end.Millisecond - start.Millisecond + " total milliseconds");
            }
            catch (IOException e)
            {
                Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }
コード例 #19
0
        public override void  SetUp()
        {
            base.SetUp();
            workDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "TestDoc"));
            System.IO.Directory.CreateDirectory(workDir.FullName);

            indexDir = new System.IO.FileInfo(System.IO.Path.Combine(workDir.FullName, "testIndex"));
            System.IO.Directory.CreateDirectory(indexDir.FullName);

            Directory directory = FSDirectory.Open(indexDir);

            directory.Close();

            files = new System.Collections.ArrayList();
            files.Add(CreateOutput("test.txt", "This is the first test file"));

            files.Add(CreateOutput("test2.txt", "This is the second test file"));
        }
コード例 #20
0
        public virtual void TestSnapshotDeletionPolicy_Renamed()
        {
            System.IO.DirectoryInfo dir = _TestUtil.GetTempDir(INDEX_PATH);
            try
            {
                Directory fsDir = FSDirectory.Open(dir);
                RunTest(fsDir);
                fsDir.Close();
            }
            finally
            {
                _TestUtil.RmDir(dir);
            }

            MockRAMDirectory dir2 = new MockRAMDirectory();
            RunTest(dir2);
            dir2.Close();
        }
コード例 #21
0
        public virtual void  TestSnapshotDeletionPolicy_Renamed()
        {
            System.IO.FileInfo dir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), INDEX_PATH));
            try
            {
                // Sometimes past test leaves the dir
                _TestUtil.RmDir(dir);
                Directory fsDir = FSDirectory.Open(dir);
                RunTest(fsDir);
                fsDir.Close();
            }
            finally
            {
                _TestUtil.RmDir(dir);
            }

            MockRAMDirectory dir2 = new MockRAMDirectory();

            RunTest(dir2);
            dir2.Close();
        }
コード例 #22
0
ファイル: Program.cs プロジェクト: svolchkov/LuceneUpdate
        public static void deleteDocs()
        {
            IndexReader reader = IndexReader.Open(FSDirectory.Open(INDEX_DIR), true);
            var         writer = new IndexWriter(FSDirectory.Open(INDEX_DIR), new CustomAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);

            for (int i = 0; i < reader.NumDocs(); i++)
            {
                Document doc   = reader.Document(i);
                String   docId = doc.Get("path");
                if (!File.Exists(docId))
                {
                    //var writer = new IndexWriter(INDEX_DIR, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
                    var term = new Term("path", docId);
                    writer.DeleteDocuments(term);
                    Console.WriteLine("Deleting " + docId);
                }
            }
            writer.Optimize();
            writer.Commit();
            writer.Dispose();
        }
コード例 #23
0
ファイル: Indexer.cs プロジェクト: sikayYang/Idiot-Talker
        /// <summary>
        /// 对dataDir下的所有文件建立索引并存储在indexDir下
        /// </summary>
        /// <param name="indexDir"></param>
        /// <param name="dataDir"></param>
        public static void Index(string indexDir, string dataDir)
        {
            if (Directory.Exists(indexDir))
            {
                Console.Out.WriteLine("Cannot save index to '" + indexDir + "' directory, please delete it first");
                Environment.Exit(1);
            }

            try
            {
                using (var writer = new IndexWriter(FSDirectory.Open(new DirectoryInfo(indexDir)), new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    Console.Out.WriteLine("Indexing to directory '" + indexDir + "'...");
                    IndexDirectory(writer, new DirectoryInfo(dataDir));
                    writer.Commit();
                }
            }
            catch (IOException e)
            {
                Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }
コード例 #24
0
        public static void Main(System.String[] args)
        {
            var usage = typeof(DeleteFiles) + " <unique_term>";

            if (args.Length == 0)
            {
                Console.Error.WriteLine("Usage: " + usage);
                Environment.Exit(1);
            }

            try
            {
                // We don't want a read-only reader because we are about to delete.
                using (var directory = FSDirectory.Open("index"))
                    using (var reader = IndexReader.Open(directory, false))
                    {
                        var term    = new Term("path", args[0]);
                        var deleted = reader.DeleteDocuments(term);

                        Console.Out.WriteLine("deleted " + deleted + " documents containing " + term);

                        // one can also delete documents by their internal id:

                        /*
                         * for (int i = 0; i < reader.MaxDoc; i++) {
                         *  Console.Out.WriteLine("Deleting document with id " + i);
                         *  reader.DeleteDocument(i);
                         * }
                         */

                        reader.Commit();
                    }
            }
            catch (Exception e)
            {
                Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }
コード例 #25
0
        public virtual void  CreateIndex(System.String dirName, bool doCFS)
        {
            RmDir(dirName);

            dirName = FullDir(dirName);

            Directory   dir    = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.UseCompoundFile = doCFS;
            writer.SetMaxBufferedDocs(10);

            for (int i = 0; i < 35; i++)
            {
                AddDoc(writer, i);
            }
            Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count");
            writer.Close();

            // open fresh writer so we get no prx file in the added segment
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            writer.UseCompoundFile = doCFS;
            writer.SetMaxBufferedDocs(10);
            AddNoProxDoc(writer);
            writer.Close();

            // Delete one doc so we get a .del file:
            IndexReader reader     = IndexReader.Open(dir, false);
            Term        searchTerm = new Term("id", "7");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "didn't delete the right number of documents");

            // Set one norm so we get a .s0 file:
            reader.SetNorm(21, "content", (float)1.5);
            reader.Close();
        }
コード例 #26
0
        public static void Main(String[] args)
        {
            String usage = "Usage:\t" + typeof(SearchFiles) + "[-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]";

            usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search.";
            if (args.Length > 0 && ("-h".Equals(args[0]) || "-help".Equals(args[0])))
            {
                Console.Out.WriteLine(usage);
                Environment.Exit(0);
            }

            String index       = "index";
            String field       = "contents";
            String queries     = null;
            int    repeat      = 0;
            bool   raw         = false;
            String normsField  = null;
            bool   paging      = true;
            int    hitsPerPage = 10;

            for (int i = 0; i < args.Length; i++)
            {
                if ("-index".Equals(args[i]))
                {
                    index = args[i + 1];
                    i++;
                }
                else if ("-field".Equals(args[i]))
                {
                    field = args[i + 1];
                    i++;
                }
                else if ("-queries".Equals(args[i]))
                {
                    queries = args[i + 1];
                    i++;
                }
                else if ("-repeat".Equals(args[i]))
                {
                    repeat = Int32.Parse(args[i + 1]);
                    i++;
                }
                else if ("-raw".Equals(args[i]))
                {
                    raw = true;
                }
                else if ("-norms".Equals(args[i]))
                {
                    normsField = args[i + 1];
                    i++;
                }
                else if ("-paging".Equals(args[i]))
                {
                    if (args[i + 1].Equals("false"))
                    {
                        paging = false;
                    }
                    else
                    {
                        hitsPerPage = Int32.Parse(args[i + 1]);
                        if (hitsPerPage == 0)
                        {
                            paging = false;
                        }
                    }
                    i++;
                }
            }

            IndexReader indexReader = null;

            try
            {
                // only searching, so read-only=true
                indexReader = IndexReader.Open(FSDirectory.Open(new System.IO.DirectoryInfo(index)), true); // only searching, so read-only=true

                if (normsField != null)
                {
                    indexReader = new OneNormsReader(indexReader, normsField);
                }

                Searcher searcher = new IndexSearcher(indexReader);
                Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);

                StreamReader queryReader;
                if (queries != null)
                {
                    queryReader = new StreamReader(new StreamReader(queries, Encoding.Default).BaseStream, new StreamReader(queries, Encoding.Default).CurrentEncoding);
                }
                else
                {
                    queryReader = new StreamReader(new StreamReader(Console.OpenStandardInput(), Encoding.UTF8).BaseStream, new StreamReader(Console.OpenStandardInput(), Encoding.UTF8).CurrentEncoding);
                }

                var parser = new QueryParser(Version.LUCENE_30, field, analyzer);
                while (true)
                {
                    if (queries == null)
                    {
                        // prompt the user
                        Console.Out.WriteLine("Enter query: ");
                    }

                    String line = queryReader.ReadLine();

                    if (line == null || line.Length == -1)
                    {
                        break;
                    }

                    line = line.Trim();
                    if (line.Length == 0)
                    {
                        break;
                    }

                    Query query = parser.Parse(line);
                    Console.Out.WriteLine("Searching for: " + query.ToString(field));

                    if (repeat > 0)
                    {
                        // repeat & time as benchmark
                        DateTime start = DateTime.Now;
                        for (int i = 0; i < repeat; i++)
                        {
                            searcher.Search(query, null, 100);
                        }
                        DateTime end = DateTime.Now;
                        Console.Out.WriteLine("Time: " + (end.Millisecond - start.Millisecond) + "ms");
                    }

                    if (paging)
                    {
                        DoPagingSearch(queryReader, searcher, query, hitsPerPage, raw, queries == null);
                    }
                    else
                    {
                        DoStreamingSearch(searcher, query);
                    }
                }
                queryReader.Close();
            }
            finally
            {
                if (indexReader != null)
                {
                    indexReader.Dispose();
                }
            }
        }
コード例 #27
0
        public virtual void  TestNorms()
        {
            // tmp dir
            System.String tempDir = System.IO.Path.GetTempPath();
            if (tempDir == null)
            {
                throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test");
            }

            // test with a single index: index1
            System.IO.FileInfo indexDir1 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex1"));
            Directory          dir1      = FSDirectory.Open(indexDir1);

            IndexWriter.Unlock(dir1);

            norms         = new System.Collections.ArrayList();
            modifiedNorms = new System.Collections.ArrayList();

            CreateIndex(dir1);
            DoTestNorms(dir1);

            // test with a single index: index2
            System.Collections.ArrayList norms1         = norms;
            System.Collections.ArrayList modifiedNorms1 = modifiedNorms;
            int numDocNorms1 = numDocNorms;

            norms         = new System.Collections.ArrayList();
            modifiedNorms = new System.Collections.ArrayList();
            numDocNorms   = 0;

            System.IO.FileInfo indexDir2 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex2"));
            Directory          dir2      = FSDirectory.Open(indexDir2);

            CreateIndex(dir2);
            DoTestNorms(dir2);

            // add index1 and index2 to a third index: index3
            System.IO.FileInfo indexDir3 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex3"));
            Directory          dir3      = FSDirectory.Open(indexDir3);

            CreateIndex(dir3);
            IndexWriter iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED);

            iw.SetMaxBufferedDocs(5);
            iw.SetMergeFactor(3);
            iw.AddIndexes(new Directory[] { dir1, dir2 });
            iw.Close();

            norms1.AddRange(norms);
            norms = norms1;
            modifiedNorms1.AddRange(modifiedNorms);
            modifiedNorms = modifiedNorms1;
            numDocNorms  += numDocNorms1;

            // test with index3
            VerifyIndex(dir3);
            DoTestNorms(dir3);

            // now with optimize
            iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED);
            iw.SetMaxBufferedDocs(5);
            iw.SetMergeFactor(3);
            iw.Optimize();
            iw.Close();
            VerifyIndex(dir3);

            dir1.Close();
            dir2.Close();
            dir3.Close();
        }
コード例 #28
0
        public virtual void TestExactFileNames()
        {
            System.String outputDir = "lucene.backwardscompat0.index";
            RmDir(outputDir);

            try
            {
                Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(FullDir(outputDir)));

                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true,
                                                     IndexWriter.MaxFieldLength.UNLIMITED);
                writer.SetRAMBufferSizeMB(16.0);
                for (int i = 0; i < 35; i++)
                {
                    AddDoc(writer, i);
                }
                Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count");
                writer.Close();

                // Delete one doc so we get a .del file:
                IndexReader reader     = IndexReader.Open(dir, false);
                Term        searchTerm = new Term("id", "7");
                int         delCount   = reader.DeleteDocuments(searchTerm);
                Assert.AreEqual(1, delCount, "didn't delete the right number of documents");

                // Set one norm so we get a .s0 file:
                reader.SetNorm(21, "content", (float)1.5);
                reader.Close();

                // The numbering of fields can vary depending on which
                // JRE is in use.  On some JREs we see content bound to
                // field 0; on others, field 1.  So, here we have to
                // figure out which field number corresponds to
                // "content", and then set our expected file names below
                // accordingly:
                CompoundFileReader cfsReader  = new CompoundFileReader(dir, "_0.cfs");
                FieldInfos         fieldInfos = new FieldInfos(cfsReader, "_0.fnm");
                int contentFieldIndex         = -1;
                for (int i = 0; i < fieldInfos.Size(); i++)
                {
                    FieldInfo fi = fieldInfos.FieldInfo(i);
                    if (fi.name_ForNUnit.Equals("content"))
                    {
                        contentFieldIndex = i;
                        break;
                    }
                }
                cfsReader.Close();
                Assert.IsTrue(contentFieldIndex != -1,
                              "could not locate the 'content' field number in the _2.cfs segment");

                // Now verify file names:
                System.String[] expected;
                expected = new System.String[]
                { "_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen" };

                System.String[] actual = dir.ListAll();
                System.Array.Sort(expected);
                System.Array.Sort(actual);
                if (!CollectionsHelper.Equals(expected, actual))
                {
                    Assert.Fail("incorrect filenames in index: expected:\n    " + AsString(expected) +
                                "\n  actual:\n    " + AsString(actual));
                }
                dir.Close();
            }
            finally
            {
                RmDir(outputDir);
            }
        }
コード例 #29
0
        public virtual void  searchIndex(System.String dirName, System.String oldName)
        {
            //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer());
            //Query query = parser.parse("handle:1");

            dirName = FullDir(dirName);

            Directory     dir      = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));
            IndexSearcher searcher = new IndexSearcher(dir, true);
            IndexReader   reader   = searcher.IndexReader;

            _TestUtil.CheckIndex(dir);

            for (int i = 0; i < 35; i++)
            {
                if (!reader.IsDeleted(i))
                {
                    Document d      = reader.Document(i);
                    var      fields = d.GetFields();
                    if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22."))
                    {
                        if (d.GetField("content3") == null)
                        {
                            int numFields = oldName.StartsWith("29.") ? 7 : 5;
                            Assert.AreEqual(numFields, fields.Count);
                            Field f = d.GetField("id");
                            Assert.AreEqual("" + i, f.StringValue);

                            f = (Field)d.GetField("utf8");
                            Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                            f = (Field)d.GetField("autf8");
                            Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                            f = (Field)d.GetField("content2");
                            Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue);

                            f = (Field)d.GetField("fie\u2C77ld");
                            Assert.AreEqual("field with non-ascii name", f.StringValue);
                        }
                    }
                }
                // Only ID 7 is deleted
                else
                {
                    Assert.AreEqual(7, i);
                }
            }

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;

            // First document should be #21 since it's norm was
            // increased:
            Document d2 = searcher.Doc(hits[0].Doc);

            Assert.AreEqual("21", d2.Get("id"), "didn't get the right document first");

            TestHits(hits, 34, searcher.IndexReader);

            if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22."))
            {
                // Test on indices >= 2.3
                hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
                hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
                hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
            }

            searcher.Close();
            dir.Close();
        }
コード例 #30
0
        /* Open pre-lockless index, add docs, do a delete &
         * setNorm, and search */
        public virtual void  ChangeIndexWithAdds(System.String dirName)
        {
            System.String origDirName = dirName;
            dirName = FullDir(dirName);

            Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));

            // open writer
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);

            // add 10 docs
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, 35 + i);
            }

            // make sure writer sees right total -- writer seems not to know about deletes in .del?
            int expected;

            if (Compare(origDirName, "24") < 0)
            {
                expected = 45;
            }
            else
            {
                expected = 46;
            }
            Assert.AreEqual(expected, writer.MaxDoc(), "wrong doc count");
            writer.Close();

            // make sure searching sees right # hits
            IndexSearcher searcher = new IndexSearcher(dir, true);

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Document   d    = searcher.Doc(hits[0].Doc);

            Assert.AreEqual("21", d.Get("id"), "wrong first document");
            TestHits(hits, 44, searcher.IndexReader);
            searcher.Close();

            // make sure we can do delete & setNorm against this
            // pre-lockless segment:
            IndexReader reader     = IndexReader.Open(dir, false);
            Term        searchTerm = new Term("id", "6");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "wrong delete count");
            reader.SetNorm(22, "content", (float)2.0);
            reader.Close();

            // make sure they "took":
            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(43, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 43, searcher.IndexReader);
            searcher.Close();

            // optimize
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);
            writer.Optimize();
            writer.Close();

            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(43, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            TestHits(hits, 43, searcher.IndexReader);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            searcher.Close();

            dir.Close();
        }