/// <summary>
        /// Gets the index searcher.
        /// </summary>
        /// <param name="indexName">Name of the index.</param>
        /// <returns></returns>
        private static IndexSearcher GetIndexSearcher(string indexName)
        {
            string      path      = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "App_data", "TEMP", "ExamineIndexes", indexName, "Index");
            FSDirectory directory = new MMapDirectory(new DirectoryInfo(path));

            return(new IndexSearcher(directory, true));
        }
Beispiel #2
0
        private static void Main(string[] args)
        {
            Thread.CurrentThread.Name = "Main";
            var exitEvent = new ManualResetEvent(false);

            Console.CancelKeyPress += (sender, eventArgs) =>
            {
                eventArgs.Cancel = true;
                exitEvent.Set();
            };

            using var dir      = new MMapDirectory("./lucene-index", new NativeFSLockFactory());
            using var analyzer = new StandardAnalyzer(Index.AppLuceneVersion);
            var indexConfig = new IndexWriterConfig(Index.AppLuceneVersion, analyzer);

            using var reader       = DirectoryReader.Open(dir);
            using var spellChecker = new SpellChecker(reader.Directory);

            spellChecker.IndexDictionary(new LuceneDictionary(reader, "Body"), indexConfig, true);

            LogProvider.SetCurrentLogProvider(ConsoleLogProvider.Instance);
            using var bus =
                      RabbitHutch.CreateBus(Environment.GetEnvironmentVariable("RABBITMQ_CSTRING") ?? "host=localhost");
            bus.RespondAsync <Spellings.Request, Spellings>(request => Task.Factory.StartNew(() =>
            {
                var similar   = spellChecker.SuggestSimilar(request.Text, 6);
                var spellings = new Spellings {
                    spellings = similar
                };
                return(spellings);
            }));

            Console.WriteLine("Running...\nPress Ctrl+C to exit");
            exitEvent.WaitOne();
        }
Beispiel #3
0
        public int[] searchIndex(string text)
        {
            var directory     = new DirectoryInfo("MovieTitlesIndex");
            var mMapDirectory = new MMapDirectory(directory);
            var Index         = mMapDirectory;

            using (var reader = IndexReader.Open(Index, true))
                using (var searcher = new IndexSearcher(reader))
                {
                    using (Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30))
                    {
                        var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "Title", analyzer);
                        var query       = queryParser.Parse(text);
                        var collector   = TopScoreDocCollector.Create(20, true);
                        searcher.Search(query, collector);
                        var matches = collector.TopDocs().ScoreDocs;

                        var matchedList = new int[matches.Count()];
                        int i           = 0;

                        foreach (var item in matches)
                        {
                            var id  = item.Doc;
                            var doc = searcher.Doc(id);
                            int titleid;
                            Int32.TryParse(doc.GetField("id").StringValue, out titleid);
                            matchedList[i] = titleid;
                            i++;
                        }

                        return(matchedList);
                    }
                }
        }
Beispiel #4
0
        static void Main(string[] args)
        {
            var sw = Stopwatch.StartNew();
            // Delete index from previous run
            var directoryName = "index";

            if (System.IO.Directory.Exists(directoryName))
            {
                System.IO.Directory.Delete(directoryName, true);
            }

            using (Directory directory = new MMapDirectory("index"))
                using (var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(LuceneVersion.LUCENE_48))
                {
                    var config = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
                    using (var ixw = new IndexWriter(directory, config))
                    {
                        IList <Document> documents = new List <Document>(2000);
                        for (int i = 0; i < 1000; i++)
                        {
                            var document = new Document
                            {
                                new StringField("id", Guid.NewGuid().ToString(), Field.Store.YES),
                                new StringField("notTokenized", "Will not be tokenized", Field.Store.YES),
                                new TextField("content", "Hello world", Field.Store.YES),
                                new Int32Field("intValue", 32, Field.Store.YES),
                                new Int32Field("intNotStoredValue", 32, Field.Store.NO),
                                new NumericDocValuesField("docValue", 64)
                            };
                            documents.Add(document);
                        }


                        for (int i = 0; i < 1000; i++)
                        {
                            var document2 = new Document
                            {
                                new StringField("id", Guid.NewGuid().ToString(), Field.Store.YES),
                                new StringField("notTokenized", "Will not be tokenized", Field.Store.YES),
                                new TextField("content", "Hello world 2", Field.Store.YES),
                                new Int32Field("intValue", 33, Field.Store.YES),
                                new Int32Field("intNotStoredValue", 32, Field.Store.NO),
                                new NumericDocValuesField("docValue", 65)
                            };
                            documents.Add(document2);
                        }

                        ixw.AddDocuments(documents);
                        ixw.Commit();

                        ixw.DeleteDocuments(NumericRangeQuery.NewInt32Range("intValue", 33, 33, true, true));
                        ixw.Commit();
                    }
                }

            Console.WriteLine(sw.ElapsedMilliseconds);
            Console.ReadKey();
        }
Beispiel #5
0
        public LuceneSearchEngine()
        {
            IndexDefinition = new EntityIndexDefinition <TEntity>();

            MapDirectory = new MMapDirectory(IndexDefinition.IndexDirectory);

            Analyzer = new StandardAnalyzer(Core.LuceneVersion);
            Parser   = new MultiFieldQueryParser(Core.LuceneVersion,
                                                 IndexDefinition.GetSearchableFields().ToArray(), Analyzer);
            _writerConfiguration = new IndexWriterConfig(Core.LuceneVersion, Analyzer);
        }
Beispiel #6
0
        public Index(string indexPath)
        {
            //prepare lucene
            var dir = new MMapDirectory(indexPath, new NativeFSLockFactory());

            _analyzer = new StandardAnalyzer(AppLuceneVersion);
            var indexConfig = new IndexWriterConfig(AppLuceneVersion, _analyzer);

            _indexWriter = new IndexWriter(dir, indexConfig);
            Debug.WriteLine($"Records in index: {_indexWriter.NumDocs}");
        }
Beispiel #7
0
        /// <summary>
        /// Initializes this instance.
        /// </summary>
        private void Initialize()
        {
            //Get the configuration section
            BestBetsSection config = (BestBetsSection)ConfigurationManager.GetSection("nci/search/bestbets");

            if (config == null)
            {
                throw new ConfigurationErrorsException("BestBetsSection (nci/search/bestbets) is missing from configuration");
            }

            //Load the path config
            if (String.IsNullOrWhiteSpace(config.PathConfigurationClass))
            {
                throw new ConfigurationErrorsException("pathConfigurationClass is required for BestBetsSection (nci/search/bestbets)");
            }

            //We should be inside a block of code that has a lock, so no need to lock class.
            IBestBetPathConfiguration pathConfig = LoadPathConfigurationClass(config.PathConfigurationClass);

            //Get configuration information
            _luceneIndexPath = pathConfig.LuceneIndexPath;

            _bestBetsPath = pathConfig.BestBetsFilePath;


            if (!IO.Directory.Exists(_luceneIndexPath))
            {
                IO.Directory.CreateDirectory(_luceneIndexPath);
            }

            //We are using an MMapDirectory as the default option (FSSimpleDirectory) does not handle
            //multi-threading well.
            _luceneIndex = MMapDirectory.Open(_luceneIndexPath);

            //If there is no index at that location, then build the index.  Otherwise, we will
            //let the scheduler build it.
            if (!IndexReader.IndexExists(_luceneIndex))
            {
                BuildIndex();
            }
        }
 private static IndexWriter InitWriter(string IndexDir, bool isNewCreate = true)
 {
     IndexWriter.MaxFieldLength maxFieldLength = IndexWriter.MaxFieldLength.LIMITED;
     Lucene.Net.Store.Directory Directory      = new MMapDirectory(new DirectoryInfo(IndexDir), new SimpleFSLockFactory()); //using SimpleFSDirectory if memory is not enough
     return(new IndexWriter(Directory, Analyzer, isNewCreate, maxFieldLength));
 }
Beispiel #9
0
        public virtual void Test()
        {
            int[]     ints  = new int[7];
            Int32sRef input = new Int32sRef(ints, 0, ints.Length);
            int       seed  = Random.Next();

            Directory dir = new MMapDirectory(CreateTempDir("2BFST"));

            for (int doPackIter = 0; doPackIter < 2; doPackIter++)
            {
                bool doPack = doPackIter == 1;

                // Build FST w/ NoOutputs and stop when nodeCount > 2.2B
                if (!doPack)
                {
                    Console.WriteLine("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
                    Outputs <object> outputs   = NoOutputs.Singleton;
                    object           NO_OUTPUT = outputs.NoOutput;
                    Builder <object> b         = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15);

                    int       count  = 0;
                    Random    r      = new Random(seed);
                    int[]     ints2  = new int[200];
                    Int32sRef input2 = new Int32sRef(ints2, 0, ints2.Length);
                    while (true)
                    {
                        //System.out.println("add: " + input + " -> " + output);
                        for (int i = 10; i < ints2.Length; i++)
                        {
                            ints2[i] = r.Next(256);
                        }
                        b.Add(input2, NO_OUTPUT);
                        count++;
                        if (count % 100000 == 0)
                        {
                            Console.WriteLine(count + ": " + b.GetFstSizeInBytes() + " bytes; " + b.TotStateCount + " nodes");
                        }
                        if (b.TotStateCount > int.MaxValue + 100L * 1024 * 1024)
                        {
                            break;
                        }
                        NextInput(r, ints2);
                    }

                    FST <object> fst = b.Finish();

                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        Arrays.Fill(ints2, 0);
                        r = new Random(seed);

                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }
                            for (int j = 10; j < ints2.Length; j++)
                            {
                                ints2[j] = r.Next(256);
                            }
                            Assert.AreEqual(NO_OUTPUT, Util.Get(fst, input2));
                            NextInput(r, ints2);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        Int32sRefFSTEnum <object> fstEnum = new Int32sRefFSTEnum <object>(fst);

                        Arrays.Fill(ints2, 0);
                        r = new Random(seed);
                        int upto = 0;
                        while (true)
                        {
                            Int32sRefFSTEnum.InputOutput <object> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            for (int j = 10; j < ints2.Length; j++)
                            {
                                ints2[j] = r.Next(256);
                            }
                            Assert.AreEqual(input2, pair.Input);
                            Assert.AreEqual(NO_OUTPUT, pair.Output);
                            upto++;
                            NextInput(r, ints2);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST <object>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }

                // Build FST w/ ByteSequenceOutputs and stop when FST
                // size = 3GB
                {
                    Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes");
                    Outputs <BytesRef> outputs = ByteSequenceOutputs.Singleton;
                    Builder <BytesRef> b       = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15);

                    var      outputBytes = new byte[20];
                    BytesRef output      = new BytesRef(outputBytes);
                    Arrays.Fill(ints, 0);
                    int    count = 0;
                    Random r     = new Random(seed);
                    while (true)
                    {
                        r.NextBytes(outputBytes);
                        //System.out.println("add: " + input + " -> " + output);
                        b.Add(input, BytesRef.DeepCopyOf(output));
                        count++;
                        if (count % 1000000 == 0)
                        {
                            Console.WriteLine(count + "...: " + b.GetFstSizeInBytes() + " bytes");
                        }
                        if (b.GetFstSizeInBytes() > LIMIT)
                        {
                            break;
                        }
                        NextInput(r, ints);
                    }

                    FST <BytesRef> fst = b.Finish();
                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        r = new Random(seed);
                        Arrays.Fill(ints, 0);

                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }
                            r.NextBytes(outputBytes);
                            Assert.AreEqual(output, Util.Get(fst, input));
                            NextInput(r, ints);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        Int32sRefFSTEnum <BytesRef> fstEnum = new Int32sRefFSTEnum <BytesRef>(fst);

                        Arrays.Fill(ints, 0);
                        r = new Random(seed);
                        int upto = 0;
                        while (true)
                        {
                            Int32sRefFSTEnum.InputOutput <BytesRef> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            Assert.AreEqual(input, pair.Input);
                            r.NextBytes(outputBytes);
                            Assert.AreEqual(output, pair.Output);
                            upto++;
                            NextInput(r, ints);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST <BytesRef>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }

                // Build FST w/ PositiveIntOutputs and stop when FST
                // size = 3GB
                {
                    Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long");
                    Outputs <long?> outputs = PositiveInt32Outputs.Singleton;
                    Builder <long?> b       = new Builder <long?>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15);

                    long output = 1;

                    Arrays.Fill(ints, 0);
                    int    count = 0;
                    Random r     = new Random(seed);
                    while (true)
                    {
                        //System.out.println("add: " + input + " -> " + output);
                        b.Add(input, output);
                        output += 1 + r.Next(10);
                        count++;
                        if (count % 1000000 == 0)
                        {
                            Console.WriteLine(count + "...: " + b.GetFstSizeInBytes() + " bytes");
                        }
                        if (b.GetFstSizeInBytes() > LIMIT)
                        {
                            break;
                        }
                        NextInput(r, ints);
                    }

                    FST <long?> fst = b.Finish();

                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        Arrays.Fill(ints, 0);

                        output = 1;
                        r      = new Random(seed);
                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }

                            // forward lookup:
                            Assert.AreEqual(output, (long)Util.Get(fst, input));
                            // reverse lookup:
                            Assert.AreEqual(input, Util.GetByOutput(fst, output));
                            output += 1 + r.Next(10);
                            NextInput(r, ints);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        Int32sRefFSTEnum <long?> fstEnum = new Int32sRefFSTEnum <long?>(fst);

                        Arrays.Fill(ints, 0);
                        r = new Random(seed);
                        int upto = 0;
                        output = 1;
                        while (true)
                        {
                            Int32sRefFSTEnum.InputOutput <long?> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            Assert.AreEqual(input, pair.Input);
                            Assert.AreEqual(output, pair.Output.Value);
                            output += 1 + r.Next(10);
                            upto++;
                            NextInput(r, ints);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST <long?>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }
            }
            dir.Dispose();
        }
Beispiel #10
0
        public virtual void Test()
        {
            int[] ints = new int[7];
            IntsRef input = new IntsRef(ints, 0, ints.Length);
            int seed = Random().Next();

            Directory dir = new MMapDirectory(CreateTempDir("2BFST"));

            for (int doPackIter = 0; doPackIter < 2; doPackIter++)
            {
                bool doPack = doPackIter == 1;

                // Build FST w/ NoOutputs and stop when nodeCount > 2.2B
                if (!doPack)
                {
                    Console.WriteLine("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
                    Outputs<object> outputs = NoOutputs.Singleton;
                    object NO_OUTPUT = outputs.NoOutput;
                    Builder<object> b = new Builder<object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15);

                    int count = 0;
                    Random r = new Random(seed);
                    int[] ints2 = new int[200];
                    IntsRef input2 = new IntsRef(ints2, 0, ints2.Length);
                    while (true)
                    {
                        //System.out.println("add: " + input + " -> " + output);
                        for (int i = 10; i < ints2.Length; i++)
                        {
                            ints2[i] = r.Next(256);
                        }
                        b.Add(input2, NO_OUTPUT);
                        count++;
                        if (count % 100000 == 0)
                        {
                            Console.WriteLine(count + ": " + b.FstSizeInBytes() + " bytes; " + b.TotStateCount + " nodes");
                        }
                        if (b.TotStateCount > int.MaxValue + 100L * 1024 * 1024)
                        {
                            break;
                        }
                        NextInput(r, ints2);
                    }

                    FST<object> fst = b.Finish();

                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        Arrays.Fill(ints2, 0);
                        r = new Random(seed);

                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }
                            for (int j = 10; j < ints2.Length; j++)
                            {
                                ints2[j] = r.Next(256);
                            }
                            Assert.AreEqual(NO_OUTPUT, Util.Get(fst, input2));
                            NextInput(r, ints2);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        IntsRefFSTEnum<object> fstEnum = new IntsRefFSTEnum<object>(fst);

                        Arrays.Fill(ints2, 0);
                        r = new Random(seed);
                        int upto = 0;
                        while (true)
                        {
                            IntsRefFSTEnum.InputOutput<object> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            for (int j = 10; j < ints2.Length; j++)
                            {
                                ints2[j] = r.Next(256);
                            }
                            Assert.AreEqual(input2, pair.Input);
                            Assert.AreEqual(NO_OUTPUT, pair.Output);
                            upto++;
                            NextInput(r, ints2);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST<object>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }

                // Build FST w/ ByteSequenceOutputs and stop when FST
                // size = 3GB
                {
                    Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes");
                    Outputs<BytesRef> outputs = ByteSequenceOutputs.Singleton;
                    Builder<BytesRef> b = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15);

                    var outputBytes = new byte[20];
                    BytesRef output = new BytesRef(outputBytes);
                    Arrays.Fill(ints, 0);
                    int count = 0;
                    Random r = new Random(seed);
                    while (true)
                    {
                        r.NextBytes(outputBytes);
                        //System.out.println("add: " + input + " -> " + output);
                        b.Add(input, BytesRef.DeepCopyOf(output));
                        count++;
                        if (count % 1000000 == 0)
                        {
                            Console.WriteLine(count + "...: " + b.FstSizeInBytes() + " bytes");
                        }
                        if (b.FstSizeInBytes() > LIMIT)
                        {
                            break;
                        }
                        NextInput(r, ints);
                    }

                    FST<BytesRef> fst = b.Finish();
                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        r = new Random(seed);
                        Arrays.Fill(ints, 0);

                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }
                            r.NextBytes((byte[])(Array)outputBytes);
                            Assert.AreEqual(output, Util.Get(fst, input));
                            NextInput(r, ints);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        IntsRefFSTEnum<BytesRef> fstEnum = new IntsRefFSTEnum<BytesRef>(fst);

                        Arrays.Fill(ints, 0);
                        r = new Random(seed);
                        int upto = 0;
                        while (true)
                        {
                            IntsRefFSTEnum.InputOutput<BytesRef> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            Assert.AreEqual(input, pair.Input);
                            r.NextBytes((byte[])(Array)outputBytes);
                            Assert.AreEqual(output, pair.Output);
                            upto++;
                            NextInput(r, ints);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST<BytesRef>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }

                // Build FST w/ PositiveIntOutputs and stop when FST
                // size = 3GB
                {
                    Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long");
                    Outputs<long?> outputs = PositiveIntOutputs.Singleton;
                    Builder<long?> b = new Builder<long?>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15);

                    long output = 1;

                    Arrays.Fill(ints, 0);
                    int count = 0;
                    Random r = new Random(seed);
                    while (true)
                    {
                        //System.out.println("add: " + input + " -> " + output);
                        b.Add(input, output);
                        output += 1 + r.Next(10);
                        count++;
                        if (count % 1000000 == 0)
                        {
                            Console.WriteLine(count + "...: " + b.FstSizeInBytes() + " bytes");
                        }
                        if (b.FstSizeInBytes() > LIMIT)
                        {
                            break;
                        }
                        NextInput(r, ints);
                    }

                    FST<long?> fst = b.Finish();

                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        Arrays.Fill(ints, 0);

                        output = 1;
                        r = new Random(seed);
                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }

                            // forward lookup:
                            Assert.AreEqual(output, (long)Util.Get(fst, input));
                            // reverse lookup:
                            Assert.AreEqual(input, Util.GetByOutput(fst, output));
                            output += 1 + r.Next(10);
                            NextInput(r, ints);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        IntsRefFSTEnum<long?> fstEnum = new IntsRefFSTEnum<long?>(fst);

                        Arrays.Fill(ints, 0);
                        r = new Random(seed);
                        int upto = 0;
                        output = 1;
                        while (true)
                        {
                            IntsRefFSTEnum.InputOutput<long?> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            Assert.AreEqual(input, pair.Input);
                            Assert.AreEqual(output, pair.Output.Value);
                            output += 1 + r.Next(10);
                            upto++;
                            NextInput(r, ints);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST<long?>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }
            }
            dir.Dispose();
        }
Beispiel #11
0
        private void button3_Click(object sender, EventArgs e)
        {
            string whole_file = File.ReadAllText(path);

            // Разделение на строки.
            whole_file = whole_file.Replace('\n', '\r');
            string[] lines = whole_file.Split(new char[] { '\r' },
                                              StringSplitOptions.RemoveEmptyEntries);
            lines[0] = lines[0] + ", Year";
            for (int i = 0; i < lines.Length; i++)
            {
                lines[i] = lines[i].Replace("(", ",");
                lines[i] = lines[i].Replace(")", "");

                if (lines[i].Split(',').Length < 3)
                {
                    lines[i] = lines[i] + " ,0000 ";
                }
            }

            // Посмотрим, сколько строк и столбцов есть.
            int num_rows = lines.Length;
            int num_cols = lines[0].Split(',').Length;

            // Выделите массив данных.
            string[,] values = new string[num_rows, num_cols];
            // Загрузите массив.
            for (int r = 0; r < num_rows; r++)
            {
                string[] line_r = lines[r].Split(',');
                for (int c = 0; c < num_cols; c++)
                {
                    values[r, c] = line_r[c];
                }
            }

            Analyzer      analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            DirectoryInfo path_    = new DirectoryInfo(@"C:\Users\wwwle\Desktop\tmp");

            Lucene.Net.Store.Directory directory = new MMapDirectory(path_);
            IndexWriter writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            for (int i = 0; i < values.GetLength(0); i++)
            {
                Document doc = new Document();
                doc.Add(
                    new Field(
                        "id",
                        values[i, 0].ToString(),
                        Field.Store.YES,
                        Field.Index.ANALYZED));
                doc.Add(
                    new Field(
                        "name",
                        values[i, 1].ToString(),
                        Field.Store.YES,
                        Field.Index.ANALYZED));
                doc.Add(
                    new Field(
                        "year",
                        values[i, 2],
                        Field.Store.YES,
                        Field.Index.ANALYZED));
                writer.AddDocument(doc);
            }
            writer.Optimize();
            writer.Close();
        }