Ejemplo n.º 1
0
        public void Can_collect_phrase()
        {
            var dir = CreateDir();

            var docs = new List <dynamic>
            {
                new { _id = "0", title = "man, the rain is cold" },
                new { _id = "1", title = "the rambo 2" },
                new { _id = "2", title = "the rocky 2" },
                new { _id = "3", title = "the rain maker" },
                new { _id = "4", title = "the rain man" },
                new { _id = "5", title = "the good, the bad and the ugly" }
            }.ToDocuments(primaryKeyFieldName: "_id");

            var  writer  = new FullTextUpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs);
            long version = writer.Write();

            writer.Dispose();

            var query = new QueryParser(new Analyzer()).Parse("+title:\"the rain man\"");

            using (var readSession = CreateReadSession(dir, version))
                using (var collector = new Collector(readSession))
                {
                    var scores = collector.Collect(query).ToList();

                    Assert.AreEqual(2, scores.Count);
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 0));
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
                }
        }
Ejemplo n.º 2
0
        public void Can_collect_prefixed()
        {
            var dir = CreateDir();

            var docs = new List <dynamic>
            {
                new { _id = "0", title = "rambo" },
                new { _id = "1", title = "rambo 2" },
                new { _id = "2", title = "rocky 2" },
                new { _id = "3", title = "raiders of the lost ark" },
                new { _id = "4", title = "rain man" }
            }.ToDocuments(primaryKeyFieldName: "_id");

            var  writer  = new FullTextUpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs);
            long version = writer.Write();

            writer.Dispose();

            using (var readSession = CreateReadSession(dir, version))
                using (var collector = new Collector(readSession))
                {
                    var query  = new QueryParser().Parse("title:'ra'*");
                    var scores = collector.Collect(query);

                    Assert.AreEqual(4, scores.Count);
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 0));
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 1));
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
                }
        }
Ejemplo n.º 3
0
        static void WritePg(string[] args)
        {
            var  take = int.MaxValue;
            var  skip = 0;
            bool gzip = false;
            bool lz   = false;

            if (Array.IndexOf(args, "--take") > 0)
            {
                take = int.Parse(args[Array.IndexOf(args, "--take") + 1]);
            }
            if (Array.IndexOf(args, "--skip") > 0)
            {
                skip = int.Parse(args[Array.IndexOf(args, "--skip") + 1]);
            }
            if (Array.IndexOf(args, "--gzip") > 0)
            {
                gzip = true;
            }
            if (Array.IndexOf(args, "--lz") > 0)
            {
                lz = true;
            }

            var compression = gzip ? Compression.GZip : lz ? Compression.Lz : Compression.NoCompression;

            string dir       = null;
            string sourceDir = null;

            if (Array.IndexOf(args, "--dir") > 0)
            {
                dir = args[Array.IndexOf(args, "--dir") + 1];
            }
            if (Array.IndexOf(args, "--source-dir") > 0)
            {
                sourceDir = args[Array.IndexOf(args, "--source-dir") + 1];
            }

            var writeTimer = new Stopwatch();

            writeTimer.Start();

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var documents = new ProjGutenbergDvdStream(sourceDir, skip, take);

            using (var upsert = new FullTextUpsertTransaction(dir, new Analyzer(), compression, documents))
            {
                upsert.Write();
            }

            Console.WriteLine("write operation took {0}", writeTimer.Elapsed);
        }
Ejemplo n.º 4
0
        static void Rewrite(string[] args)
        {
            var    take = int.MaxValue;
            var    skip = 0;
            string pk   = null;
            bool   gzip = false;
            bool   lz   = false;
            string dir  = null;

            if (Array.IndexOf(args, "--take") > 0)
            {
                take = int.Parse(args[Array.IndexOf(args, "--take") + 1]);
            }
            if (Array.IndexOf(args, "--skip") > 0)
            {
                skip = int.Parse(args[Array.IndexOf(args, "--skip") + 1]);
            }
            if (Array.IndexOf(args, "--pk") > 0)
            {
                pk = args[Array.IndexOf(args, "--pk") + 1];
            }
            if (Array.IndexOf(args, "--gzip") > 0)
            {
                gzip = true;
            }
            if (Array.IndexOf(args, "--lz") > 0)
            {
                lz = true;
            }
            if (Array.IndexOf(args, "--dir") > 0)
            {
                dir = args[Array.IndexOf(args, "--dir") + 1];
            }

            var compression  = gzip ? Compression.GZip : lz ? Compression.Lz : Compression.NoCompression;
            var dataFileName = args[Array.IndexOf(args, "--file") + 1];
            var ixFileName   = Directory.GetFiles(Path.GetDirectoryName(dataFileName), "*.ix")
                               .OrderBy(s => s).First();
            var ix = SegmentInfo.Load(ixFileName);

            Console.WriteLine("rewriting...");

            var writeTimer = new Stopwatch();

            writeTimer.Start();

            using (var stream = new FileStream(dataFileName, FileMode.Open))
                using (var documents = new DocumentTableStream(stream, ix, skip, take))
                    using (var upsert = new FullTextUpsertTransaction(dir, new Analyzer(), compression, documents))
                    {
                        upsert.Write();
                    }

            Console.WriteLine("write operation took {0}", writeTimer.Elapsed);
        }
Ejemplo n.º 5
0
        public void Can_collect_exact_terms_joined_by_or()
        {
            var dir = CreateDir();

            var docs = new List <dynamic>
            {
                new { _id = "0", title = "rambo first blood" },
                new { _id = "1", title = "rambo 2" },
                new { _id = "2", title = "rocky 2" },
                new { _id = "3", title = "raiders of the lost ark" },
                new { _id = "4", title = "the rain man" },
                new { _id = "5", title = "the good, the bad and the ugly" }
            }.ToDocuments(primaryKeyFieldName: "_id");

            var  writer  = new FullTextUpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs);
            long version = writer.Write();

            writer.Dispose();

            var query = new QueryParser(new Analyzer()).Parse("+title:'rocky'");

            using (var readSession = CreateReadSession(dir, version))
                using (var collector = new Collector(readSession))
                {
                    var scores = collector.Collect(query).ToList();

                    Assert.AreEqual(1, scores.Count);
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 2));
                }

            query = new QueryParser(new Analyzer()).Parse("+title:'rambo'");

            using (var readSession = CreateReadSession(dir, version))
                using (var collector = new Collector(readSession))
                {
                    var scores = collector.Collect(query).ToList();

                    Assert.AreEqual(2, scores.Count);
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 0));
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 1));
                }

            query = new QueryParser(new Analyzer()).Parse("+title:'rocky' title:'rambo'");

            using (var readSession = CreateReadSession(dir, version))
                using (var collector = new Collector(readSession))
                {
                    var scores = collector.Collect(query).ToList();

                    Assert.AreEqual(3, scores.Count);
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 0));
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 1));
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 2));
                }
        }
Ejemplo n.º 6
0
        public void Can_search_two_index_segments()
        {
            var dir = CreateDir();

            var docs = new List <dynamic>
            {
                new { _id = "0", title = "Rambo First Blood" },
                new { _id = "1", title = "the rain man" },
                new { _id = "2", title = "the good, the bad and the ugly" }
            }.ToDocuments(primaryKeyFieldName: "_id");

            var writer = new FullTextUpsertTransaction(
                dir, new Analyzer(), compression: Compression.NoCompression, documents: docs);
            long indexName = writer.Write();

            writer.Dispose();

            using (var searcher = new Searcher(dir))
            {
                var result = searcher.Search("title:\"rambo first\"");

                Assert.AreEqual(1, result.Total);
                Assert.AreEqual(1, result.Docs.Count);

                Assert.IsTrue(result.Docs.Any(d => d.TableRow.Fields["_id"].Value == "0"));
            }

            var moreDocs = new List <dynamic>
            {
                new { _id = "3", title = "rocky 2" },
                new { _id = "4", title = "rambo 2" },
                new { _id = "5", title = "the raiders of the lost ark" },
            }.ToDocuments(primaryKeyFieldName: "_id");

            var writer2 = new FullTextUpsertTransaction(
                dir, new Analyzer(), compression: Compression.NoCompression, documents: moreDocs);
            long indexName2 = writer2.Write();

            writer2.Dispose();

            using (var searcher = new Searcher(dir))
            {
                var result = searcher.Search("title:\"rambo first\"");

                Assert.AreEqual(1, result.Total);
                Assert.AreEqual(1, result.Docs.Count);

                Assert.IsTrue(result.Docs.Any(d => d.TableRow.Fields["_id"].Value == "0"));
            }
        }
Ejemplo n.º 7
0
        public void Can_collect_numbers()
        {
            var dir = CreateDir();

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <dynamic>
            {
                new { _id = "0", title = 5 },
                new { _id = "1", title = 4 },
                new { _id = "2", title = 3 },
                new { _id = "3", title = 2 },
                new { _id = "4", title = 1 },
                new { _id = "5", title = 0 }
            }.ToDocuments(primaryKeyFieldName: "_id");

            var  writer  = new FullTextUpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs);
            long version = writer.Write();

            writer.Dispose();

            var query = new QueryParser().Parse("title:3");

            using (var readSession = CreateReadSession(dir, version))
                using (var collector = new Collector(readSession))
                {
                    var scores = collector.Collect(query).ToList();

                    Assert.AreEqual(1, scores.Count);
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 2));
                }

            //query = new List<QueryContext>{new QueryContext("title", 0, 3) };
            query = new QueryParser().Parse("title<3+title>0");

            using (var readSession = CreateReadSession(dir, version))
                using (var collector = new Collector(readSession))
                {
                    var scores = collector.Collect(query).ToList();

                    Assert.AreEqual(4, scores.Count);
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 5));
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 2));
                }
        }
Ejemplo n.º 8
0
        public void Can_search_exact()
        {
            var dir = CreateDir();

            var docs = new List <dynamic>
            {
                new { _id = "0", title = "Rambo First Blood" },
                new { _id = "1", title = "rambo 2" },
                new { _id = "2", title = "rocky 2" },
                new { _id = "3", title = "the raiders of the lost ark" },
                new { _id = "4", title = "the rain man" },
                new { _id = "5", title = "the good, the bad and the ugly" }
            }.ToDocuments(primaryKeyFieldName: "_id");

            var writer = new FullTextUpsertTransaction(
                dir, new Analyzer(), compression: Compression.NoCompression, documents: docs);
            long indexName = writer.Write();

            writer.Dispose();

            using (var searcher = new Searcher(dir))
            {
                var result = searcher.Search("title:'rambo'");

                Assert.AreEqual(2, result.Total);
                Assert.AreEqual(2, result.Docs.Count);

                Assert.IsTrue(result.Docs.Any(d => d.TableRow.TableId == 0));
                Assert.IsTrue(result.Docs.Any(d => d.TableRow.TableId == 1));

                Assert.AreEqual(
                    "Rambo First Blood",
                    result.Docs.First(d => d.TableRow.TableId == 0).TableRow.Fields["title"].Value);
            }

            using (var searcher = new Searcher(dir))
            {
                var result = searcher.Search("title:'the'");

                Assert.AreEqual(3, result.Total);
                Assert.AreEqual(3, result.Docs.Count);
                Assert.IsTrue(result.Docs.Any(d => d.TableRow.TableId == 3));
                Assert.IsTrue(result.Docs.Any(d => d.TableRow.TableId == 4));
                Assert.IsTrue(result.Docs.Any(d => d.TableRow.TableId == 5));
            }
        }
Ejemplo n.º 9
0
        public void Can_collect_date_range()
        {
            var dir = CreateDir();

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var lowerBound = DateTime.Now;
            var upperBound = DateTime.Now.AddDays(1);

            var docs = new List <dynamic>
            {
                new { _id = "0", created = DateTime.Now.AddDays(-1) },
                new { _id = "1", created = lowerBound },
                new { _id = "2", created = upperBound },
                new { _id = "3", created = upperBound.AddDays(1) },
                new { _id = "4", created = upperBound.AddDays(2) },
                new { _id = "5", created = upperBound.AddDays(3) }
            }.ToDocuments(primaryKeyFieldName: "_id");

            var  writer  = new FullTextUpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs);
            long version = writer.Write();

            writer.Dispose();

            //query = new List<QueryContext> { new QueryContext("created", lowerBound, upperBound) };
            var query = new QueryParser().Parse("created>\\" + lowerBound + "\\+created<\\" + upperBound + "\\");

            using (var readSession = CreateReadSession(dir, version))
                using (var collector = new Collector(readSession))
                {
                    var scores = collector.Collect(query).ToList();

                    Assert.AreEqual(2, scores.Count);
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 1));
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 2));
                }
        }
Ejemplo n.º 10
0
        public void Can_collect_by_id()
        {
            var dir = CreateDir();

            var docs = new List <dynamic>
            {
                new { _id = "abc0123", title = "rambo first blood" },
                new { _id = "1", title = "rambo 2" },
                new { _id = "2", title = "rocky 2" },
                new { _id = "3", title = "the raiders of the lost ark" },
                new { _id = "four", title = "the rain man" },
                new { _id = "5five", title = "the good, the bad and the ugly" }
            }.ToDocuments(primaryKeyFieldName: "_id");

            long version;

            using (var writer = new FullTextUpsertTransaction(dir, new Analyzer(), Compression.Lz, docs))
            {
                version = writer.Write();
            }
            using (var readSession = CreateReadSession(dir, version))
                using (var collector = new Collector(readSession))
                {
                    var query  = new QueryParser().Parse("_id:'3'");
                    var scores = collector.Collect(query);

                    Assert.AreEqual(1, scores.Count);
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
                }

            using (var readSession = CreateReadSession(dir, version))
                using (var collector = new Collector(readSession))
                {
                    var query  = new QueryParser().Parse("_id:'5five'");
                    var scores = collector.Collect(query);

                    Assert.AreEqual(1, scores.Count);
                    Assert.IsTrue(scores.Any(d => d.DocumentId == 5));
                }
        }