Пример #1
0
        public virtual void Test2BTerms_Mem([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler)
        {
            if ("Lucene3x".Equals(Codec.Default.Name))
            {
                throw new Exception("this test cannot run with PreFlex codec");
            }
            Console.WriteLine("Starting Test2B");
            long TERM_COUNT = ((long)int.MaxValue) + 100000000;

            int TERMS_PER_DOC = TestUtil.NextInt(Random(), 100000, 1000000);

            IList <BytesRef> savedTerms = null;

            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms"));

            //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            dir.CheckIndexOnClose = false; // don't double-checkindex

            if (true)
            {
                IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                                                .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                                .SetRAMBufferSizeMB(256.0)
                                                .SetMergeScheduler(scheduler)
                                                .SetMergePolicy(NewLogMergePolicy(false, 10))
                                                .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));

                MergePolicy mp = w.Config.MergePolicy;
                if (mp is LogByteSizeMergePolicy)
                {
                    // 1 petabyte:
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
                }

                Documents.Document doc = new Documents.Document();
                MyTokenStream      ts  = new MyTokenStream(Random(), TERMS_PER_DOC);

                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
                customType.OmitNorms    = true;
                Field field = new Field("field", ts, customType);
                doc.Add(field);
                //w.setInfoStream(System.out);
                int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC);

                Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC);
                Console.WriteLine("numDocs=" + numDocs);

                for (int i = 0; i < numDocs; i++)
                {
                    long t0 = Environment.TickCount;
                    w.AddDocument(doc);
                    Console.WriteLine(i + " of " + numDocs + " " + (Environment.TickCount - t0) + " msec");
                }
                savedTerms = ts.SavedTerms;

                Console.WriteLine("TEST: full merge");
                w.ForceMerge(1);
                Console.WriteLine("TEST: close writer");
                w.Dispose();
            }

            Console.WriteLine("TEST: open reader");
            IndexReader r = DirectoryReader.Open(dir);

            if (savedTerms == null)
            {
                savedTerms = FindTerms(r);
            }
            int numSavedTerms            = savedTerms.Count;
            IList <BytesRef> bigOrdTerms = new List <BytesRef>(savedTerms.SubList(numSavedTerms - 10, numSavedTerms));

            Console.WriteLine("TEST: test big ord terms...");
            TestSavedTerms(r, bigOrdTerms);
            Console.WriteLine("TEST: test all saved terms...");
            TestSavedTerms(r, savedTerms);
            r.Dispose();

            Console.WriteLine("TEST: now CheckIndex...");
            CheckIndex.Status status = TestUtil.CheckIndex(dir);
            long tc = status.SegmentInfos[0].TermIndexStatus.TermCount;

            Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue);

            dir.Dispose();
            Console.WriteLine("TEST: done!");
        }
Пример #2
0
        public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                            .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                            .SetRAMBufferSizeMB(256.0)
                            .SetMergeScheduler(scheduler)
                            .SetMergePolicy(NewLogMergePolicy(false, 10))
                            .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;
            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            ft.OmitNorms = true;
            MyTokenStream tokenStream = new MyTokenStream();
            Field field = new Field("field", tokenStream, ft);
            doc.Add(field);

            const int numDocs = 1000;
            for (int i = 0; i < numDocs; i++)
            {
                if (i % 2 == 1) // trick blockPF's little optimization
                {
                    tokenStream.n = 65536;
                }
                else
                {
                    tokenStream.n = 65537;
                }
                w.AddDocument(doc);
            }
            w.ForceMerge(1);
            w.Dispose();

            DirectoryReader oneThousand = DirectoryReader.Open(dir);
            IndexReader[] subReaders = new IndexReader[1000];
            Arrays.Fill(subReaders, oneThousand);
            MultiReader mr = new MultiReader(subReaders);
            BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2"));
            if (dir2 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
            w2.AddIndexes(mr);
            w2.ForceMerge(1);
            w2.Dispose();
            oneThousand.Dispose();

            DirectoryReader oneMillion = DirectoryReader.Open(dir2);
            subReaders = new IndexReader[2000];
            Arrays.Fill(subReaders, oneMillion);
            mr = new MultiReader(subReaders);
            BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3"));
            if (dir3 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
            w3.AddIndexes(mr);
            w3.ForceMerge(1);
            w3.Dispose();
            oneMillion.Dispose();

            dir.Dispose();
            dir2.Dispose();
            dir3.Dispose();
        }
Пример #3
0
        public virtual void Test2BTerms_Mem()
        {
            if ("Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal))
            {
                throw RuntimeException.Create("this test cannot run with PreFlex codec");
            }
            Console.WriteLine("Starting Test2B");
            long TERM_COUNT = ((long)int.MaxValue) + 100000000;

            int TERMS_PER_DOC = TestUtil.NextInt32(Random, 100000, 1000000);

            IList <BytesRef> savedTerms = null;

            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms"));

            //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER;
            }
            dir.CheckIndexOnDispose = false; // don't double-checkindex

            if (true)
            {
                IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                                .SetRAMBufferSizeMB(256.0)
                                                .SetMergeScheduler(new ConcurrentMergeScheduler())
                                                .SetMergePolicy(NewLogMergePolicy(false, 10))
                                                .SetOpenMode(OpenMode.CREATE));

                MergePolicy mp = w.Config.MergePolicy;
                if (mp is LogByteSizeMergePolicy)
                {
                    // 1 petabyte:
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
                }

                Documents.Document doc = new Documents.Document();
                MyTokenStream      ts  = new MyTokenStream(Random, TERMS_PER_DOC);

                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.IndexOptions = IndexOptions.DOCS_ONLY;
                customType.OmitNorms    = true;
                Field field = new Field("field", ts, customType);
                doc.Add(field);
                //w.setInfoStream(System.out);
                int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC);

                Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC);
                Console.WriteLine("numDocs=" + numDocs);

                for (int i = 0; i < numDocs; i++)
                {
                    long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond;                                                          // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                    w.AddDocument(doc);
                    Console.WriteLine(i + " of " + numDocs + " " + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " msec"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                }
                savedTerms = ts.savedTerms;

                Console.WriteLine("TEST: full merge");
                w.ForceMerge(1);
                Console.WriteLine("TEST: close writer");
                w.Dispose();
            }

            Console.WriteLine("TEST: open reader");
            IndexReader r = DirectoryReader.Open(dir);

            if (savedTerms is null)
            {
                savedTerms = FindTerms(r);
            }
            int numSavedTerms            = savedTerms.Count;
            IList <BytesRef> bigOrdTerms = new JCG.List <BytesRef>(savedTerms.GetView(numSavedTerms - 10, 10)); // LUCENENET: Converted end index to length

            Console.WriteLine("TEST: test big ord terms...");
            TestSavedTerms(r, bigOrdTerms);
            Console.WriteLine("TEST: test all saved terms...");
            TestSavedTerms(r, savedTerms);
            r.Dispose();

            Console.WriteLine("TEST: now CheckIndex...");
            CheckIndex.Status status = TestUtil.CheckIndex(dir);
            long tc = status.SegmentInfos[0].TermIndexStatus.TermCount;

            Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue);

            dir.Dispose();
            Console.WriteLine("TEST: done!");
        }
Пример #4
0
        public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1"));

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                         .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                         .SetRAMBufferSizeMB(256.0)
                         .SetMergeScheduler(scheduler)
                         .SetMergePolicy(NewLogMergePolicy(false, 10))
                         .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;

            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document  doc = new Document();
            FieldType ft  = new FieldType(TextField.TYPE_NOT_STORED);

            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            ft.OmitNorms    = true;
            MyTokenStream tokenStream = new MyTokenStream();
            Field         field       = new Field("field", tokenStream, ft);

            doc.Add(field);

            const int numDocs = 1000;

            for (int i = 0; i < numDocs; i++)
            {
                if (i % 2 == 1) // trick blockPF's little optimization
                {
                    tokenStream.n = 65536;
                }
                else
                {
                    tokenStream.n = 65537;
                }
                w.AddDocument(doc);
            }
            w.ForceMerge(1);
            w.Dispose();

            DirectoryReader oneThousand = DirectoryReader.Open(dir);

            IndexReader[] subReaders = new IndexReader[1000];
            Arrays.Fill(subReaders, oneThousand);
            MultiReader          mr   = new MultiReader(subReaders);
            BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2"));

            if (dir2 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null));

            w2.AddIndexes(mr);
            w2.ForceMerge(1);
            w2.Dispose();
            oneThousand.Dispose();

            DirectoryReader oneMillion = DirectoryReader.Open(dir2);

            subReaders = new IndexReader[2000];
            Arrays.Fill(subReaders, oneMillion);
            mr = new MultiReader(subReaders);
            BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3"));

            if (dir3 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null));

            w3.AddIndexes(mr);
            w3.ForceMerge(1);
            w3.Dispose();
            oneMillion.Dispose();

            dir.Dispose();
            dir2.Dispose();
            dir3.Dispose();
        }
Пример #5
0
        public override ParsingResultSet Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
        {
            MyTokenStream out_token_stream = new MyTokenStream();
            MyLexer       lexer            = new MyLexer(null);
            MyParser      parser           = new MyParser(out_token_stream);
            MyCharStream  fake_char_stream = new MyCharStream();
            string        text             = null;

            lexer.InputStream = fake_char_stream;
            if (!(reader.TokenType == JsonTokenType.StartObject))
            {
                throw new JsonException();
            }
            reader.Read();
            List <string>                mode_names        = new List <string>();
            List <string>                channel_names     = new List <string>();
            List <string>                lexer_rule_names  = new List <string>();
            List <string>                literal_names     = new List <string>();
            List <string>                symbolic_names    = new List <string>();
            Dictionary <string, int>     token_type_map    = new Dictionary <string, int>();
            List <string>                parser_rule_names = new List <string>();
            Dictionary <int, IParseTree> nodes             = new Dictionary <int, IParseTree>();
            List <IParseTree>            result            = new List <IParseTree>();

            while (reader.TokenType == JsonTokenType.PropertyName)
            {
                string pn = reader.GetString();
                reader.Read();
                if (pn == "FileName")
                {
                    var name = reader.GetString();
                    fake_char_stream.SourceName = name;
                    reader.Read();
                }
                else if (pn == "Text")
                {
                    out_token_stream.Text = reader.GetString();
                    fake_char_stream.Text = out_token_stream.Text;
                    text = out_token_stream.Text;
                    reader.Read();
                }
                else if (pn == "Tokens")
                {
                    if (!(reader.TokenType == JsonTokenType.StartArray))
                    {
                        throw new JsonException();
                    }
                    reader.Read();
                    int token_index = 0;
                    while (reader.TokenType == JsonTokenType.Number)
                    {
                        var type = reader.GetInt32();
                        reader.Read();
                        var start = reader.GetInt32();
                        reader.Read();
                        var stop = reader.GetInt32();
                        reader.Read();
                        var line = reader.GetInt32();
                        reader.Read();
                        var column = reader.GetInt32();
                        reader.Read();
                        var channel = reader.GetInt32();
                        reader.Read();
                        var token = new MyToken();
                        token.Type        = type;
                        token.StartIndex  = start;
                        token.StopIndex   = stop;
                        token.Line        = line;
                        token.Column      = column;
                        token.Channel     = channel;
                        token.InputStream = lexer.InputStream;
                        token.TokenSource = lexer;
                        token.TokenIndex  = token_index++;
                        token.Text        =
                            out_token_stream.Text.Substring(token.StartIndex, token.StopIndex - token.StartIndex + 1);
                        out_token_stream.Add(token);
                    }

                    reader.Read();
                }
                else if (pn == "ModeNames")
                {
                    if (!(reader.TokenType == JsonTokenType.StartArray))
                    {
                        throw new JsonException();
                    }
                    reader.Read();
                    while (reader.TokenType == JsonTokenType.String || reader.TokenType == JsonTokenType.Null)
                    {
                        mode_names.Add(reader.GetString());
                        reader.Read();
                    }

                    reader.Read();
                    lexer._modeNames = mode_names.ToArray();
                }
                else if (pn == "ChannelNames")
                {
                    if (!(reader.TokenType == JsonTokenType.StartArray))
                    {
                        throw new JsonException();
                    }
                    reader.Read();
                    while (reader.TokenType == JsonTokenType.String)
                    {
                        channel_names.Add(reader.GetString());
                        reader.Read();
                    }

                    reader.Read();
                    lexer._channelNames = channel_names.ToArray();
                }
                else if (pn == "LiteralNames")
                {
                    if (!(reader.TokenType == JsonTokenType.StartArray))
                    {
                        throw new JsonException();
                    }
                    reader.Read();
                    while (reader.TokenType == JsonTokenType.String || reader.TokenType == JsonTokenType.Null)
                    {
                        literal_names.Add(reader.GetString());
                        reader.Read();
                    }

                    reader.Read();
                }
                else if (pn == "SymbolicNames")
                {
                    if (!(reader.TokenType == JsonTokenType.StartArray))
                    {
                        throw new JsonException();
                    }
                    reader.Read();
                    while (reader.TokenType == JsonTokenType.String || reader.TokenType == JsonTokenType.Null)
                    {
                        symbolic_names.Add(reader.GetString());
                        reader.Read();
                    }

                    reader.Read();
                }
                else if (pn == "LexerRuleNames")
                {
                    if (!(reader.TokenType == JsonTokenType.StartArray))
                    {
                        throw new JsonException();
                    }
                    reader.Read();
                    while (reader.TokenType == JsonTokenType.String || reader.TokenType == JsonTokenType.Null)
                    {
                        lexer_rule_names.Add(reader.GetString());
                        reader.Read();
                    }

                    reader.Read();
                }
                else if (pn == "ParserRuleNames")
                {
                    if (!(reader.TokenType == JsonTokenType.StartArray))
                    {
                        throw new JsonException();
                    }
                    reader.Read();
                    while (reader.TokenType == JsonTokenType.String || reader.TokenType == JsonTokenType.Null)
                    {
                        var name = reader.GetString();
                        parser_rule_names.Add(name);
                        reader.Read();
                    }

                    reader.Read();
                }
                else if (pn == "TokenTypeMap")
                {
                    if (!(reader.TokenType == JsonTokenType.StartArray))
                    {
                        throw new JsonException();
                    }
                    reader.Read();
                    while (reader.TokenType == JsonTokenType.String || reader.TokenType == JsonTokenType.Null)
                    {
                        var name = reader.GetString();
                        reader.Read();
                        var tt = reader.GetInt32();
                        reader.Read();
                        token_type_map[name] = tt;
                    }

                    reader.Read();
                }
                else if (pn == "Nodes")
                {
                    List <IParseTree> list_of_nodes = new List <IParseTree>();
                    if (!(reader.TokenType == JsonTokenType.StartArray))
                    {
                        throw new JsonException();
                    }
                    reader.Read();
                    int current = 1;
                    while (reader.TokenType == JsonTokenType.Number)
                    {
                        int parent = reader.GetInt32();
                        reader.Read();
                        int type_of_node = reader.GetInt32();
                        reader.Read();
                        var parent_node = parent > 0 ? nodes[parent] as MyParserRuleContext : null;
                        if (type_of_node < 1000000)
                        {
                            MyParserRuleContext foo = new MyParserRuleContext(parent_node, 0)
                            {
                                _ruleIndex = type_of_node
                            };
                            nodes[current] = foo;
                            if (parent_node == null)
                            {
                                result.Add(foo);
                            }
                            else
                            {
                                parent_node.AddChild((Antlr4.Runtime.RuleContext)foo);
                            }
                        }
                        else
                        {
                            var index  = type_of_node - 1000000;
                            var symbol = out_token_stream.Get(index);
                            var foo    = new MyTerminalNodeImpl(symbol);
                            nodes[current] = foo;
                            foo.Parent     = parent_node;
                            if (parent_node == null)
                            {
                                result.Add(foo);
                            }
                            else
                            {
                                parent_node.AddChild(foo);
                            }
                        }

                        current++;
                    }

                    foreach (var n in result)
                    {
                        Sweep(n);
                    }

                    reader.Read();
                }
                else
                {
                    throw new JsonException();
                }
            }

            var vocab = new Vocabulary(literal_names.ToArray(), symbolic_names.ToArray());

            parser._vocabulary      = vocab;
            parser._grammarFileName = fake_char_stream.SourceName;
            parser._ruleNames       = parser_rule_names.ToArray();
            lexer._vocabulary       = vocab;
            lexer._ruleNames        = lexer_rule_names.ToArray();
            lexer._tokenTypeMap     = token_type_map;
            var res = new AntlrJson.ParsingResultSet()
            {
                FileName = fake_char_stream.SourceName,
                Stream   = out_token_stream,
                Nodes    = result.ToArray(),
                Lexer    = lexer,
                Parser   = parser,
                Text     = text
            };

            return(res);
        }