public virtual void Test2BTerms_Mem([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler) { if ("Lucene3x".Equals(Codec.Default.Name)) { throw new Exception("this test cannot run with PreFlex codec"); } Console.WriteLine("Starting Test2B"); long TERM_COUNT = ((long)int.MaxValue) + 100000000; int TERMS_PER_DOC = TestUtil.NextInt(Random(), 100000, 1000000); IList <BytesRef> savedTerms = null; BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms")); //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } dir.CheckIndexOnClose = false; // don't double-checkindex if (true) { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Documents.Document doc = new Documents.Document(); MyTokenStream ts = new MyTokenStream(Random(), TERMS_PER_DOC); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; customType.OmitNorms = true; Field field = new Field("field", ts, customType); doc.Add(field); //w.setInfoStream(System.out); int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC); Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC); Console.WriteLine("numDocs=" + numDocs); for (int i = 0; i < numDocs; i++) { long t0 = Environment.TickCount; w.AddDocument(doc); Console.WriteLine(i + " of " + numDocs + " " + (Environment.TickCount - t0) + " msec"); } savedTerms = ts.SavedTerms; Console.WriteLine("TEST: full merge"); w.ForceMerge(1); Console.WriteLine("TEST: close writer"); w.Dispose(); } Console.WriteLine("TEST: open reader"); IndexReader r = DirectoryReader.Open(dir); if (savedTerms == null) { savedTerms = FindTerms(r); } int numSavedTerms = savedTerms.Count; IList <BytesRef> bigOrdTerms = new List <BytesRef>(savedTerms.SubList(numSavedTerms - 10, numSavedTerms)); Console.WriteLine("TEST: test big ord terms..."); TestSavedTerms(r, bigOrdTerms); Console.WriteLine("TEST: test all saved terms..."); TestSavedTerms(r, savedTerms); r.Dispose(); Console.WriteLine("TEST: now CheckIndex..."); CheckIndex.Status status = TestUtil.CheckIndex(dir); long tc = status.SegmentInfos[0].TermIndexStatus.TermCount; Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue); dir.Dispose(); Console.WriteLine("TEST: done!"); }
public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); IndexWriter w = new IndexWriter(dir, config); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; ft.OmitNorms = true; MyTokenStream tokenStream = new MyTokenStream(); Field field = new Field("field", tokenStream, ft); doc.Add(field); const int numDocs = 1000; for (int i = 0; i < numDocs; i++) { if (i % 2 == 1) // trick blockPF's little optimization { tokenStream.n = 65536; } else { tokenStream.n = 65537; } w.AddDocument(doc); } w.ForceMerge(1); w.Dispose(); DirectoryReader oneThousand = DirectoryReader.Open(dir); IndexReader[] subReaders = new IndexReader[1000]; Arrays.Fill(subReaders, oneThousand); MultiReader mr = new MultiReader(subReaders); BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2")); if (dir2 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w2.AddIndexes(mr); w2.ForceMerge(1); w2.Dispose(); oneThousand.Dispose(); DirectoryReader oneMillion = DirectoryReader.Open(dir2); subReaders = new IndexReader[2000]; Arrays.Fill(subReaders, oneMillion); mr = new MultiReader(subReaders); BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3")); if (dir3 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w3.AddIndexes(mr); w3.ForceMerge(1); w3.Dispose(); oneMillion.Dispose(); dir.Dispose(); dir2.Dispose(); dir3.Dispose(); }
public virtual void Test2BTerms_Mem() { if ("Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal)) { throw RuntimeException.Create("this test cannot run with PreFlex codec"); } Console.WriteLine("Starting Test2B"); long TERM_COUNT = ((long)int.MaxValue) + 100000000; int TERMS_PER_DOC = TestUtil.NextInt32(Random, 100000, 1000000); IList <BytesRef> savedTerms = null; BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms")); //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } dir.CheckIndexOnDispose = false; // don't double-checkindex if (true) { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(new ConcurrentMergeScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE)); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Documents.Document doc = new Documents.Document(); MyTokenStream ts = new MyTokenStream(Random, TERMS_PER_DOC); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.IndexOptions = IndexOptions.DOCS_ONLY; customType.OmitNorms = true; Field field = new Field("field", ts, customType); doc.Add(field); //w.setInfoStream(System.out); int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC); Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC); Console.WriteLine("numDocs=" + numDocs); for (int i = 0; i < numDocs; i++) { long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results w.AddDocument(doc); Console.WriteLine(i + " of " + numDocs + " " + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " msec"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } savedTerms = ts.savedTerms; Console.WriteLine("TEST: full merge"); w.ForceMerge(1); Console.WriteLine("TEST: close writer"); w.Dispose(); } Console.WriteLine("TEST: open reader"); IndexReader r = DirectoryReader.Open(dir); if (savedTerms is null) { savedTerms = FindTerms(r); } int numSavedTerms = savedTerms.Count; IList <BytesRef> bigOrdTerms = new JCG.List <BytesRef>(savedTerms.GetView(numSavedTerms - 10, 10)); // LUCENENET: Converted end index to length Console.WriteLine("TEST: test big ord terms..."); TestSavedTerms(r, bigOrdTerms); Console.WriteLine("TEST: test all saved terms..."); TestSavedTerms(r, savedTerms); r.Dispose(); Console.WriteLine("TEST: now CheckIndex..."); CheckIndex.Status status = TestUtil.CheckIndex(dir); long tc = status.SegmentInfos[0].TermIndexStatus.TermCount; Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue); dir.Dispose(); Console.WriteLine("TEST: done!"); }
public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); IndexWriter w = new IndexWriter(dir, config); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; ft.OmitNorms = true; MyTokenStream tokenStream = new MyTokenStream(); Field field = new Field("field", tokenStream, ft); doc.Add(field); const int numDocs = 1000; for (int i = 0; i < numDocs; i++) { if (i % 2 == 1) // trick blockPF's little optimization { tokenStream.n = 65536; } else { tokenStream.n = 65537; } w.AddDocument(doc); } w.ForceMerge(1); w.Dispose(); DirectoryReader oneThousand = DirectoryReader.Open(dir); IndexReader[] subReaders = new IndexReader[1000]; Arrays.Fill(subReaders, oneThousand); MultiReader mr = new MultiReader(subReaders); BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2")); if (dir2 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w2.AddIndexes(mr); w2.ForceMerge(1); w2.Dispose(); oneThousand.Dispose(); DirectoryReader oneMillion = DirectoryReader.Open(dir2); subReaders = new IndexReader[2000]; Arrays.Fill(subReaders, oneMillion); mr = new MultiReader(subReaders); BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3")); if (dir3 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w3.AddIndexes(mr); w3.ForceMerge(1); w3.Dispose(); oneMillion.Dispose(); dir.Dispose(); dir2.Dispose(); dir3.Dispose(); }
public override ParsingResultSet Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) { MyTokenStream out_token_stream = new MyTokenStream(); MyLexer lexer = new MyLexer(null); MyParser parser = new MyParser(out_token_stream); MyCharStream fake_char_stream = new MyCharStream(); string text = null; lexer.InputStream = fake_char_stream; if (!(reader.TokenType == JsonTokenType.StartObject)) { throw new JsonException(); } reader.Read(); List <string> mode_names = new List <string>(); List <string> channel_names = new List <string>(); List <string> lexer_rule_names = new List <string>(); List <string> literal_names = new List <string>(); List <string> symbolic_names = new List <string>(); Dictionary <string, int> token_type_map = new Dictionary <string, int>(); List <string> parser_rule_names = new List <string>(); Dictionary <int, IParseTree> nodes = new Dictionary <int, IParseTree>(); List <IParseTree> result = new List <IParseTree>(); while (reader.TokenType == JsonTokenType.PropertyName) { string pn = reader.GetString(); reader.Read(); if (pn == "FileName") { var name = reader.GetString(); fake_char_stream.SourceName = name; reader.Read(); } else if (pn == "Text") { out_token_stream.Text = reader.GetString(); fake_char_stream.Text = out_token_stream.Text; text = out_token_stream.Text; reader.Read(); } else if (pn == "Tokens") { if (!(reader.TokenType == JsonTokenType.StartArray)) { throw new JsonException(); } reader.Read(); int token_index = 0; while (reader.TokenType == JsonTokenType.Number) { var type = reader.GetInt32(); reader.Read(); var start = reader.GetInt32(); reader.Read(); var stop = reader.GetInt32(); reader.Read(); var line = reader.GetInt32(); reader.Read(); var column = reader.GetInt32(); reader.Read(); var channel = reader.GetInt32(); reader.Read(); var token = new MyToken(); token.Type = type; token.StartIndex = start; token.StopIndex = stop; token.Line = line; token.Column = column; token.Channel = channel; token.InputStream = lexer.InputStream; token.TokenSource = lexer; token.TokenIndex = token_index++; token.Text = out_token_stream.Text.Substring(token.StartIndex, token.StopIndex - token.StartIndex + 1); out_token_stream.Add(token); } reader.Read(); } else if (pn == "ModeNames") { if (!(reader.TokenType == JsonTokenType.StartArray)) { throw new JsonException(); } reader.Read(); while (reader.TokenType == JsonTokenType.String || reader.TokenType == JsonTokenType.Null) { mode_names.Add(reader.GetString()); reader.Read(); } reader.Read(); lexer._modeNames = mode_names.ToArray(); } else if (pn == "ChannelNames") { if (!(reader.TokenType == JsonTokenType.StartArray)) { throw new JsonException(); } reader.Read(); while (reader.TokenType == JsonTokenType.String) { channel_names.Add(reader.GetString()); reader.Read(); } reader.Read(); lexer._channelNames = channel_names.ToArray(); } else if (pn == "LiteralNames") { if (!(reader.TokenType == JsonTokenType.StartArray)) { throw new JsonException(); } reader.Read(); while (reader.TokenType == JsonTokenType.String || reader.TokenType == JsonTokenType.Null) { literal_names.Add(reader.GetString()); reader.Read(); } reader.Read(); } else if (pn == "SymbolicNames") { if (!(reader.TokenType == JsonTokenType.StartArray)) { throw new JsonException(); } reader.Read(); while (reader.TokenType == JsonTokenType.String || reader.TokenType == JsonTokenType.Null) { symbolic_names.Add(reader.GetString()); reader.Read(); } reader.Read(); } else if (pn == "LexerRuleNames") { if (!(reader.TokenType == JsonTokenType.StartArray)) { throw new JsonException(); } reader.Read(); while (reader.TokenType == JsonTokenType.String || reader.TokenType == JsonTokenType.Null) { lexer_rule_names.Add(reader.GetString()); reader.Read(); } reader.Read(); } else if (pn == "ParserRuleNames") { if (!(reader.TokenType == JsonTokenType.StartArray)) { throw new JsonException(); } reader.Read(); while (reader.TokenType == JsonTokenType.String || reader.TokenType == JsonTokenType.Null) { var name = reader.GetString(); parser_rule_names.Add(name); reader.Read(); } reader.Read(); } else if (pn == "TokenTypeMap") { if (!(reader.TokenType == JsonTokenType.StartArray)) { throw new JsonException(); } reader.Read(); while (reader.TokenType == JsonTokenType.String || reader.TokenType == JsonTokenType.Null) { var name = reader.GetString(); reader.Read(); var tt = reader.GetInt32(); reader.Read(); token_type_map[name] = tt; } reader.Read(); } else if (pn == "Nodes") { List <IParseTree> list_of_nodes = new List <IParseTree>(); if (!(reader.TokenType == JsonTokenType.StartArray)) { throw new JsonException(); } reader.Read(); int current = 1; while (reader.TokenType == JsonTokenType.Number) { int parent = reader.GetInt32(); reader.Read(); int type_of_node = reader.GetInt32(); reader.Read(); var parent_node = parent > 0 ? nodes[parent] as MyParserRuleContext : null; if (type_of_node < 1000000) { MyParserRuleContext foo = new MyParserRuleContext(parent_node, 0) { _ruleIndex = type_of_node }; nodes[current] = foo; if (parent_node == null) { result.Add(foo); } else { parent_node.AddChild((Antlr4.Runtime.RuleContext)foo); } } else { var index = type_of_node - 1000000; var symbol = out_token_stream.Get(index); var foo = new MyTerminalNodeImpl(symbol); nodes[current] = foo; foo.Parent = parent_node; if (parent_node == null) { result.Add(foo); } else { parent_node.AddChild(foo); } } current++; } foreach (var n in result) { Sweep(n); } reader.Read(); } else { throw new JsonException(); } } var vocab = new Vocabulary(literal_names.ToArray(), symbolic_names.ToArray()); parser._vocabulary = vocab; parser._grammarFileName = fake_char_stream.SourceName; parser._ruleNames = parser_rule_names.ToArray(); lexer._vocabulary = vocab; lexer._ruleNames = lexer_rule_names.ToArray(); lexer._tokenTypeMap = token_type_map; var res = new AntlrJson.ParsingResultSet() { FileName = fake_char_stream.SourceName, Stream = out_token_stream, Nodes = result.ToArray(), Lexer = lexer, Parser = parser, Text = text }; return(res); }