public void Can_collect_phrase() { var dir = CreateDir(); var docs = new List <dynamic> { new { _id = "0", title = "man, the rain is cold" }, new { _id = "1", title = "the rambo 2" }, new { _id = "2", title = "the rocky 2" }, new { _id = "3", title = "the rain maker" }, new { _id = "4", title = "the rain man" }, new { _id = "5", title = "the good, the bad and the ugly" } }.ToDocuments(primaryKeyFieldName: "_id"); var writer = new FullTextUpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs); long version = writer.Write(); writer.Dispose(); var query = new QueryParser(new Analyzer()).Parse("+title:\"the rain man\""); using (var readSession = CreateReadSession(dir, version)) using (var collector = new Collector(readSession)) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(2, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 0)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); } }
public void Can_collect_prefixed() { var dir = CreateDir(); var docs = new List <dynamic> { new { _id = "0", title = "rambo" }, new { _id = "1", title = "rambo 2" }, new { _id = "2", title = "rocky 2" }, new { _id = "3", title = "raiders of the lost ark" }, new { _id = "4", title = "rain man" } }.ToDocuments(primaryKeyFieldName: "_id"); var writer = new FullTextUpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs); long version = writer.Write(); writer.Dispose(); using (var readSession = CreateReadSession(dir, version)) using (var collector = new Collector(readSession)) { var query = new QueryParser().Parse("title:'ra'*"); var scores = collector.Collect(query); Assert.AreEqual(4, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 0)); Assert.IsTrue(scores.Any(d => d.DocumentId == 1)); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); } }
static void WritePg(string[] args) { var take = int.MaxValue; var skip = 0; bool gzip = false; bool lz = false; if (Array.IndexOf(args, "--take") > 0) { take = int.Parse(args[Array.IndexOf(args, "--take") + 1]); } if (Array.IndexOf(args, "--skip") > 0) { skip = int.Parse(args[Array.IndexOf(args, "--skip") + 1]); } if (Array.IndexOf(args, "--gzip") > 0) { gzip = true; } if (Array.IndexOf(args, "--lz") > 0) { lz = true; } var compression = gzip ? Compression.GZip : lz ? Compression.Lz : Compression.NoCompression; string dir = null; string sourceDir = null; if (Array.IndexOf(args, "--dir") > 0) { dir = args[Array.IndexOf(args, "--dir") + 1]; } if (Array.IndexOf(args, "--source-dir") > 0) { sourceDir = args[Array.IndexOf(args, "--source-dir") + 1]; } var writeTimer = new Stopwatch(); writeTimer.Start(); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var documents = new ProjGutenbergDvdStream(sourceDir, skip, take); using (var upsert = new FullTextUpsertTransaction(dir, new Analyzer(), compression, documents)) { upsert.Write(); } Console.WriteLine("write operation took {0}", writeTimer.Elapsed); }
static void Rewrite(string[] args) { var take = int.MaxValue; var skip = 0; string pk = null; bool gzip = false; bool lz = false; string dir = null; if (Array.IndexOf(args, "--take") > 0) { take = int.Parse(args[Array.IndexOf(args, "--take") + 1]); } if (Array.IndexOf(args, "--skip") > 0) { skip = int.Parse(args[Array.IndexOf(args, "--skip") + 1]); } if (Array.IndexOf(args, "--pk") > 0) { pk = args[Array.IndexOf(args, "--pk") + 1]; } if (Array.IndexOf(args, "--gzip") > 0) { gzip = true; } if (Array.IndexOf(args, "--lz") > 0) { lz = true; } if (Array.IndexOf(args, "--dir") > 0) { dir = args[Array.IndexOf(args, "--dir") + 1]; } var compression = gzip ? Compression.GZip : lz ? Compression.Lz : Compression.NoCompression; var dataFileName = args[Array.IndexOf(args, "--file") + 1]; var ixFileName = Directory.GetFiles(Path.GetDirectoryName(dataFileName), "*.ix") .OrderBy(s => s).First(); var ix = SegmentInfo.Load(ixFileName); Console.WriteLine("rewriting..."); var writeTimer = new Stopwatch(); writeTimer.Start(); using (var stream = new FileStream(dataFileName, FileMode.Open)) using (var documents = new DocumentTableStream(stream, ix, skip, take)) using (var upsert = new FullTextUpsertTransaction(dir, new Analyzer(), compression, documents)) { upsert.Write(); } Console.WriteLine("write operation took {0}", writeTimer.Elapsed); }
public void Can_collect_exact_terms_joined_by_or() { var dir = CreateDir(); var docs = new List <dynamic> { new { _id = "0", title = "rambo first blood" }, new { _id = "1", title = "rambo 2" }, new { _id = "2", title = "rocky 2" }, new { _id = "3", title = "raiders of the lost ark" }, new { _id = "4", title = "the rain man" }, new { _id = "5", title = "the good, the bad and the ugly" } }.ToDocuments(primaryKeyFieldName: "_id"); var writer = new FullTextUpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs); long version = writer.Write(); writer.Dispose(); var query = new QueryParser(new Analyzer()).Parse("+title:'rocky'"); using (var readSession = CreateReadSession(dir, version)) using (var collector = new Collector(readSession)) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(1, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 2)); } query = new QueryParser(new Analyzer()).Parse("+title:'rambo'"); using (var readSession = CreateReadSession(dir, version)) using (var collector = new Collector(readSession)) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(2, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 0)); Assert.IsTrue(scores.Any(d => d.DocumentId == 1)); } query = new QueryParser(new Analyzer()).Parse("+title:'rocky' title:'rambo'"); using (var readSession = CreateReadSession(dir, version)) using (var collector = new Collector(readSession)) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(3, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 0)); Assert.IsTrue(scores.Any(d => d.DocumentId == 1)); Assert.IsTrue(scores.Any(d => d.DocumentId == 2)); } }
public void Can_search_two_index_segments() { var dir = CreateDir(); var docs = new List <dynamic> { new { _id = "0", title = "Rambo First Blood" }, new { _id = "1", title = "the rain man" }, new { _id = "2", title = "the good, the bad and the ugly" } }.ToDocuments(primaryKeyFieldName: "_id"); var writer = new FullTextUpsertTransaction( dir, new Analyzer(), compression: Compression.NoCompression, documents: docs); long indexName = writer.Write(); writer.Dispose(); using (var searcher = new Searcher(dir)) { var result = searcher.Search("title:\"rambo first\""); Assert.AreEqual(1, result.Total); Assert.AreEqual(1, result.Docs.Count); Assert.IsTrue(result.Docs.Any(d => d.TableRow.Fields["_id"].Value == "0")); } var moreDocs = new List <dynamic> { new { _id = "3", title = "rocky 2" }, new { _id = "4", title = "rambo 2" }, new { _id = "5", title = "the raiders of the lost ark" }, }.ToDocuments(primaryKeyFieldName: "_id"); var writer2 = new FullTextUpsertTransaction( dir, new Analyzer(), compression: Compression.NoCompression, documents: moreDocs); long indexName2 = writer2.Write(); writer2.Dispose(); using (var searcher = new Searcher(dir)) { var result = searcher.Search("title:\"rambo first\""); Assert.AreEqual(1, result.Total); Assert.AreEqual(1, result.Docs.Count); Assert.IsTrue(result.Docs.Any(d => d.TableRow.Fields["_id"].Value == "0")); } }
public void Can_collect_numbers() { var dir = CreateDir(); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <dynamic> { new { _id = "0", title = 5 }, new { _id = "1", title = 4 }, new { _id = "2", title = 3 }, new { _id = "3", title = 2 }, new { _id = "4", title = 1 }, new { _id = "5", title = 0 } }.ToDocuments(primaryKeyFieldName: "_id"); var writer = new FullTextUpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs); long version = writer.Write(); writer.Dispose(); var query = new QueryParser().Parse("title:3"); using (var readSession = CreateReadSession(dir, version)) using (var collector = new Collector(readSession)) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(1, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 2)); } //query = new List<QueryContext>{new QueryContext("title", 0, 3) }; query = new QueryParser().Parse("title<3+title>0"); using (var readSession = CreateReadSession(dir, version)) using (var collector = new Collector(readSession)) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(4, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 5)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 2)); } }
public void Can_search_exact() { var dir = CreateDir(); var docs = new List <dynamic> { new { _id = "0", title = "Rambo First Blood" }, new { _id = "1", title = "rambo 2" }, new { _id = "2", title = "rocky 2" }, new { _id = "3", title = "the raiders of the lost ark" }, new { _id = "4", title = "the rain man" }, new { _id = "5", title = "the good, the bad and the ugly" } }.ToDocuments(primaryKeyFieldName: "_id"); var writer = new FullTextUpsertTransaction( dir, new Analyzer(), compression: Compression.NoCompression, documents: docs); long indexName = writer.Write(); writer.Dispose(); using (var searcher = new Searcher(dir)) { var result = searcher.Search("title:'rambo'"); Assert.AreEqual(2, result.Total); Assert.AreEqual(2, result.Docs.Count); Assert.IsTrue(result.Docs.Any(d => d.TableRow.TableId == 0)); Assert.IsTrue(result.Docs.Any(d => d.TableRow.TableId == 1)); Assert.AreEqual( "Rambo First Blood", result.Docs.First(d => d.TableRow.TableId == 0).TableRow.Fields["title"].Value); } using (var searcher = new Searcher(dir)) { var result = searcher.Search("title:'the'"); Assert.AreEqual(3, result.Total); Assert.AreEqual(3, result.Docs.Count); Assert.IsTrue(result.Docs.Any(d => d.TableRow.TableId == 3)); Assert.IsTrue(result.Docs.Any(d => d.TableRow.TableId == 4)); Assert.IsTrue(result.Docs.Any(d => d.TableRow.TableId == 5)); } }
public void Can_collect_date_range() { var dir = CreateDir(); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var lowerBound = DateTime.Now; var upperBound = DateTime.Now.AddDays(1); var docs = new List <dynamic> { new { _id = "0", created = DateTime.Now.AddDays(-1) }, new { _id = "1", created = lowerBound }, new { _id = "2", created = upperBound }, new { _id = "3", created = upperBound.AddDays(1) }, new { _id = "4", created = upperBound.AddDays(2) }, new { _id = "5", created = upperBound.AddDays(3) } }.ToDocuments(primaryKeyFieldName: "_id"); var writer = new FullTextUpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs); long version = writer.Write(); writer.Dispose(); //query = new List<QueryContext> { new QueryContext("created", lowerBound, upperBound) }; var query = new QueryParser().Parse("created>\\" + lowerBound + "\\+created<\\" + upperBound + "\\"); using (var readSession = CreateReadSession(dir, version)) using (var collector = new Collector(readSession)) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(2, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 1)); Assert.IsTrue(scores.Any(d => d.DocumentId == 2)); } }
public void Can_collect_by_id() { var dir = CreateDir(); var docs = new List <dynamic> { new { _id = "abc0123", title = "rambo first blood" }, new { _id = "1", title = "rambo 2" }, new { _id = "2", title = "rocky 2" }, new { _id = "3", title = "the raiders of the lost ark" }, new { _id = "four", title = "the rain man" }, new { _id = "5five", title = "the good, the bad and the ugly" } }.ToDocuments(primaryKeyFieldName: "_id"); long version; using (var writer = new FullTextUpsertTransaction(dir, new Analyzer(), Compression.Lz, docs)) { version = writer.Write(); } using (var readSession = CreateReadSession(dir, version)) using (var collector = new Collector(readSession)) { var query = new QueryParser().Parse("_id:'3'"); var scores = collector.Collect(query); Assert.AreEqual(1, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); } using (var readSession = CreateReadSession(dir, version)) using (var collector = new Collector(readSession)) { var query = new QueryParser().Parse("_id:'5five'"); var scores = collector.Collect(query); Assert.AreEqual(1, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 5)); } }