Exemplo n.º 1
0
        public void Can_collect_exact_phrase_joined_by_and()
        {
            var dir = Path.Combine(Setup.Dir, "Can_collect_exact_phrase_joined_by_and");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <Dictionary <string, string> >
            {
                new Dictionary <string, string> {
                    { "_id", "0" }, { "title", "rambo first blood" }
                },
                new Dictionary <string, string> {
                    { "_id", "1" }, { "title", "rambo 2" }
                },
                new Dictionary <string, string> {
                    { "_id", "2" }, { "title", "rocky 2" }
                },
                new Dictionary <string, string> {
                    { "_id", "3" }, { "title", "the raiders of the lost ark" }
                },
                new Dictionary <string, string> {
                    { "_id", "4" }, { "title", "the rain man" }
                },
                new Dictionary <string, string> {
                    { "_id", "5" }, { "title", "the good, the bad and the ugly" }
                }
            };

            string indexName;

            using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream()))
            {
                indexName = writer.Execute();
            }

            var query = new QueryParser(new Analyzer()).Parse("+title:the");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.That(scores.Count, Is.EqualTo(3));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 5));
            }

            query = new QueryParser(new Analyzer()).Parse("+title:the +title:ugly");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.That(scores.Count, Is.EqualTo(1));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 5));
            }
        }
Exemplo n.º 2
0
        public void Can_collect_prefixed()
        {
            var dir = CreateDir();

            var docs = new List <dynamic>
            {
                new { _id = "0", title = "rambo" },
                new { _id = "1", title = "rambo 2" },
                new { _id = "2", title = "rocky 2" },
                new { _id = "3", title = "raiders of the lost ark" },
                new { _id = "4", title = "rain man" }
            }.ToDocuments(primaryKeyFieldName: "_id");

            var  writer    = new UpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs);
            long indexName = writer.Write();

            writer.Dispose();

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(new QueryContext("title", "ra")
                {
                    Prefix = true
                }).ToList();

                Assert.AreEqual(4, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 0));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 1));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
            }
        }
Exemplo n.º 3
0
        public void Can_collect_prefixed()
        {
            var dir = Path.Combine(Dir, "Can_collect_prefixed");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <Field>
            {
                new Field(0, "_id", "0"), new Field(0, "title", "rambo"),
                new Field(1, "_id", "1"), new Field(1, "title", "rambo 2"),
                new Field(2, "_id", "2"), new Field(2, "title", "rocky 2"),
                new Field(3, "_id", "3"), new Field(3, "title", "raiders of the lost ark"),
                new Field(4, "_id", "4"), new Field(4, "title", "rain man")
            }.GroupBy(f => f.DocumentId).Select(g => new Document(g.Key, g.ToList()));

            var  writer    = new DocumentUpsertOperation(dir, new Analyzer(), compression: Compression.Lz, primaryKey: "_id", documents: docs);
            long indexName = writer.Commit();

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(new QueryContext("title", "ra")
                {
                    Prefix = true
                }).ToList();

                Assert.AreEqual(4, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 0));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 1));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
            }
        }
Exemplo n.º 4
0
 public DocumentScore(int documentId, UInt64 docHash, double score, IxInfo ix)
 {
     DocumentId = documentId;
     Score      = score;
     Ix         = ix;
     DocHash    = docHash;
 }
Exemplo n.º 5
0
        public void Can_collect_near()
        {
            var dir = Path.Combine(Setup.Dir, "Can_collect_near");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <Dictionary <string, string> >
            {
                new Dictionary <string, string> {
                    { "_id", "0" }, { "title", "rambo" }
                },
                new Dictionary <string, string> {
                    { "_id", "1" }, { "title", "rambo 2" }
                },
                new Dictionary <string, string> {
                    { "_id", "2" }, { "title", "rocky 2" }
                },
                new Dictionary <string, string> {
                    { "_id", "3" }, { "title", "raiders of the lost ark" }
                },
                new Dictionary <string, string> {
                    { "_id", "4" }, { "title", "tomb raider" }
                }
            };

            string indexName;

            using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream()))
            {
                indexName = writer.Execute();
            }

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(new QueryContext("title", "raider")
                {
                    Fuzzy = false, Edits = 1
                }).ToList();

                Assert.That(scores.Count, Is.EqualTo(1));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
            }

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(new QueryContext("title", "raider")
                {
                    Fuzzy = true, Edits = 1
                }).ToList();

                Assert.That(scores.Count, Is.EqualTo(2));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
            }
        }
Exemplo n.º 6
0
        public void Can_collect_exact_phrase_joined_by_not()
        {
            var dir = Path.Combine(Dir, "Can_collect_exact_phrase_joined_by_not");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <List <Field> >
            {
                new List <Field> {
                    new Field("_id", "0"), new Field("title", "rambo first blood")
                },
                new List <Field> {
                    new Field("_id", "1"), new Field("title", "rambo 2")
                },
                new List <Field> {
                    new Field("_id", "2"), new Field("title", "rocky 2")
                },
                new List <Field> {
                    new Field("_id", "3"), new Field("title", "raiders of the lost ark")
                },
                new List <Field> {
                    new Field("_id", "4"), new Field("title", "the rain man")
                },
                new List <Field> {
                    new Field("_id", "5"), new Field("title", "the good, the bad and the ugly")
                }
            };

            var  writer    = new DocumentUpsertOperation(dir, new Analyzer(), compression: Compression.QuickLz, primaryKey: "_id", documents: docs);
            long indexName = writer.Commit();

            var query = new QueryParser(new Analyzer()).Parse("+title:the");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.AreEqual(3, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 5));
            }

            query = new QueryParser(new Analyzer()).Parse("+title:the -title:ugly");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.AreEqual(2, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
            }
        }
Exemplo n.º 7
0
        public void Can_collect_exact_phrase_joined_by_or()
        {
            var dir = Path.Combine(CreateDir(), "Can_collect_exact_phrase_joined_by_or");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <dynamic>
            {
                new { _id = "0", title = "rambo first blood" },
                new { _id = "1", title = "rambo 2" },
                new { _id = "2", title = "rocky 2" },
                new { _id = "3", title = "raiders of the lost ark" },
                new { _id = "4", title = "the rain man" },
                new { _id = "5", title = "the good, the bad and the ugly" }
            }.ToDocuments();

            var  writer    = new DocumentsUpsertOperation(dir, new Analyzer(), compression: Compression.Lz, primaryKey: "_id", documents: docs);
            long indexName = writer.Commit();

            var query = new QueryParser(new Analyzer()).Parse("+title:rocky");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.AreEqual(1, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 2));
            }

            query = new QueryParser(new Analyzer()).Parse("+title:rambo");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.AreEqual(2, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 0));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 1));
            }

            query = new QueryParser(new Analyzer()).Parse("+title:rocky title:rambo");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.AreEqual(3, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 0));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 1));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 2));
            }
        }
Exemplo n.º 8
0
        public void Can_collect_near()
        {
            var dir = Path.Combine(Dir, "Can_collect_near");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <List <Field> >
            {
                new List <Field> {
                    new Field("_id", "0"), new Field("title", "rambo")
                },
                new List <Field> {
                    new Field("_id", "1"), new Field("title", "rambo 2")
                },
                new List <Field> {
                    new Field("_id", "2"), new Field("title", "rocky 2")
                },
                new List <Field> {
                    new Field("_id", "3"), new Field("title", "raiders of the lost ark")
                },
                new List <Field> {
                    new Field("_id", "4"), new Field("title", "tomb raider")
                }
            };

            var  writer    = new DocumentUpsertOperation(dir, new Analyzer(), compression: Compression.QuickLz, primaryKey: "_id", documents: docs);
            long indexName = writer.Commit();

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(new QueryContext("title", "raider")
                {
                    Fuzzy = false, Edits = 1
                }).ToList();

                Assert.AreEqual(1, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
            }

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(new QueryContext("title", "raider")
                {
                    Fuzzy = true, Edits = 1
                }).ToList();

                Assert.AreEqual(2, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
            }
        }
Exemplo n.º 9
0
        public void Can_rank_near_term()
        {
            var dir = Path.Combine(Setup.Dir, "Can_rank_near_term");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <Dictionary <string, string> >
            {
                new Dictionary <string, string> {
                    { "_id", "0" }, { "title", "Gustav Horn, Count of Pori" }
                },
                new Dictionary <string, string> {
                    { "_id", "1" }, { "title", "Port au Port Peninsula" }
                },
                new Dictionary <string, string> {
                    { "_id", "2" }, { "title", "Pore" }
                },
                new Dictionary <string, string> {
                    { "_id", "3" }, { "title", "Born 2.0" }
                },
                new Dictionary <string, string> {
                    { "_id", "4" }, { "title", "P**n" }
                }
            };

            string indexName;

            using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream()))
            {
                indexName = writer.Execute();
            }

            var query = new QueryParser(new Analyzer()).Parse("+title:p**n~");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.That(scores.Count, Is.EqualTo(5));
                Assert.IsTrue(scores.First().DocumentId.Equals(4));
                Assert.IsTrue(scores[1].DocumentId.Equals(0));
                Assert.IsTrue(scores[2].DocumentId.Equals(1));
                Assert.IsTrue(scores[3].DocumentId.Equals(3));
                Assert.IsTrue(scores[4].DocumentId.Equals(2));
            }
        }
Exemplo n.º 10
0
        public void Can_rank_near_phrase()
        {
            var dir = Path.Combine(Setup.Dir, "Can_rank_near_phrase");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <Dictionary <string, string> >
            {
                new Dictionary <string, string> {
                    { "_id", "0" }, { "title", "Tage Mage" }
                },
                new Dictionary <string, string> {
                    { "_id", "1" }, { "title", "aye-aye" }
                },
                new Dictionary <string, string> {
                    { "_id", "2" }, { "title", "Cage Rage Championships" }
                },
                new Dictionary <string, string> {
                    { "_id", "3" }, { "title", "Page Up and Page Down keys" }
                },
                new Dictionary <string, string> {
                    { "_id", "4" }, { "title", "Golden Age of P**n" }
                }
            };

            string indexName;

            using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream()))
            {
                indexName = writer.Execute();
            }

            var query = new QueryParser(new Analyzer()).Parse("+title:age of p**n~");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.That(scores.Count, Is.EqualTo(5));
                Assert.IsTrue(scores.First().DocumentId.Equals(4));
            }
        }
Exemplo n.º 11
0
        static void Export(string[] args)
        {
            var take = int.MaxValue;
            var skip = 0;

            if (Array.IndexOf(args, "--take") > 0)
            {
                take = int.Parse(args[Array.IndexOf(args, "--take") + 1]);
            }
            if (Array.IndexOf(args, "--skip") > 0)
            {
                skip = int.Parse(args[Array.IndexOf(args, "--skip") + 1]);
            }

            var sourceFileName = args[Array.IndexOf(args, "--source-file") + 1];
            var targetFileName = args[Array.IndexOf(args, "--target-file") + 1];

            var dir     = Path.GetDirectoryName(sourceFileName);
            var version = Path.GetFileNameWithoutExtension(sourceFileName);
            var ix      = IxInfo.Load(Path.Combine(dir, version + ".ix"));

            Console.WriteLine("migrating...");

            var writeTimer = new Stopwatch();

            writeTimer.Start();

            using (var outStream = new FileStream(targetFileName, FileMode.Create))
                using (var jsonWriter = new StreamWriter(outStream, Encoding.UTF8))
                    using (var documents = new RDocStream(sourceFileName, ix.PrimaryKeyFieldName, skip, take))
                    {
                        jsonWriter.WriteLine("[");

                        foreach (var document in documents.ReadSource())
                        {
                            var dic  = document.Fields.ToDictionary(x => x.Key, y => y.Value.Value);
                            var json = JsonConvert.SerializeObject(dic, Formatting.None);
                            jsonWriter.WriteLine(json);
                        }

                        jsonWriter.Write("]");
                    }

            Console.WriteLine("write operation took {0}", writeTimer.Elapsed);
        }
Exemplo n.º 12
0
        public void Can_collect_exact_phrase_joined_by_not()
        {
            var dir = CreateDir();

            var docs = new List <dynamic>
            {
                new { _id = "0", title = "rambo first blood" },
                new { _id = "1", title = "rambo 2" },
                new { _id = "2", title = "rocky 2" },
                new { _id = "3", title = "raiders of the lost ark" },
                new { _id = "4", title = "the rain man" },
                new { _id = "5", title = "the good, the bad and the ugly" }
            }.ToDocuments(primaryKeyFieldName: "_id");

            var  writer    = new UpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs);
            long indexName = writer.Write();

            writer.Dispose();

            var query = new QueryParser(new Analyzer()).Parse("+title:the");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.AreEqual(3, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 5));
            }

            query = new QueryParser(new Analyzer()).Parse("+title:the -title:ugly");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.AreEqual(2, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
            }
        }
Exemplo n.º 13
0
        public void Can_collect_near_phrase()
        {
            var dir = Path.Combine(Dir, "Can_collect_near_phrase_joined_by_and");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <Field>
            {
                new Field(0, "_id", "0"), new Field(0, "title", "rambo first blood"),
                new Field(1, "_id", "1"), new Field(1, "title", "rambo 2"),
                new Field(2, "_id", "2"), new Field(2, "title", "rocky 2"),
                new Field(3, "_id", "3"), new Field(3, "title", "the raid"),
                new Field(4, "_id", "4"), new Field(4, "title", "the rain man"),
                new Field(5, "_id", "5"), new Field(5, "title", "the good, the bad and the ugly")
            }.GroupBy(f => f.DocumentId).Select(g => new Document(g.Key, g.ToList()));

            var  writer    = new DocumentUpsertOperation(dir, new Analyzer(), compression: Compression.Lz, primaryKey: "_id", documents: docs);
            long indexName = writer.Commit();

            var query = new QueryParser(new Analyzer()).Parse("+title:rain man");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.AreEqual(1, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
            }

            query = new QueryParser(new Analyzer(), 0.75f).Parse("+title:rain man~");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.AreEqual(1, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
            }
        }
Exemplo n.º 14
0
        public void Can_delete()
        {
            var dir = CreateDir();

            var docs = new List <dynamic>
            {
                new { _id = "0", title = "rambo first blood" },
                new { _id = "1", title = "rambo 2" },
                new { _id = "2", title = "rocky 2" },
                new { _id = "3", title = "raiders of the lost ark" },
                new { _id = "4", title = "the rain man" },
                new { _id = "5", title = "the good, the bad and the ugly" }
            }.ToDocuments(primaryKeyFieldName: "_id");

            var  writer    = new UpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs);
            long indexName = writer.Write();

            writer.Dispose();

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(new QueryContext("title", "rambo")).ToList();

                Assert.AreEqual(2, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 0));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 1));
            }

            var operation = new DeleteByPrimaryKeyTransaction(dir, new[] { "0" });

            operation.Commit();

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(new QueryContext("title", "rambo")).ToList();

                Assert.AreEqual(1, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 1));
            }
        }
Exemplo n.º 15
0
        public void Can_collect_by_id()
        {
            var dir = Path.Combine(CreateDir(), "Can_collect_by_id");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <dynamic>
            {
                new { _id = "abc0123", title = "rambo first blood" },
                new { _id = "1", title = "rambo 2" },
                new { _id = "2", title = "rocky 2" },
                new { _id = "3", title = "the raiders of the lost ark" },
                new { _id = "four", title = "the rain man" },
                new { _id = "5five", title = "the good, the bad and the ugly" }
            }.ToDocuments();

            var  writer    = new DocumentsUpsertOperation(dir, new Analyzer(), compression: Compression.Lz, primaryKey: "_id", documents: docs);
            long indexName = writer.Commit();

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(new QueryContext("_id", "3")).ToList();

                Assert.AreEqual(1, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
            }

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(new QueryContext("_id", "5five")).ToList();

                Assert.AreEqual(1, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 5));
            }
        }
Exemplo n.º 16
0
        public void Can_collect_by_id()
        {
            var dir = Path.Combine(Dir, "Can_collect_by_id");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <Field>
            {
                new Field(0, "_id", "abc0123"), new Field(0, "title", "rambo first blood"),
                new Field(1, "_id", "1"), new Field(1, "title", "rambo 2"),
                new Field(2, "_id", "2"), new Field(2, "title", "rocky 2"),
                new Field(3, "_id", "3"), new Field(3, "title", "the raiders of the lost ark"),
                new Field(4, "_id", "four"), new Field(4, "title", "the rain man"),
                new Field(5, "_id", "5five"), new Field(5, "title", "the good, the bad and the ugly")
            }.GroupBy(f => f.DocumentId).Select(g => new Document(g.Key, g.ToList())).OrderBy(d => d.Id);

            var  writer    = new DocumentUpsertOperation(dir, new Analyzer(), compression: Compression.Lz, primaryKey: "_id", documents: docs);
            long indexName = writer.Commit();

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(new QueryContext("_id", "3")).ToList();

                Assert.AreEqual(1, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
            }

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(new QueryContext("_id", "5five")).ToList();

                Assert.AreEqual(1, scores.Count);
                Assert.IsTrue(scores.Any(d => d.DocumentId == 5));
            }
        }
Exemplo n.º 17
0
        public static IEnumerable <IList <DocumentPosting> > ReadPostings(string directory, IxInfo ix, IEnumerable <Term> terms)
        {
            var posFileName = Path.Combine(directory, String.Format("{0}.{1}", ix.VersionId, "pos"));
            var addresses   = terms.Select(term => term.Word.PostingsAddress.Value).OrderBy(adr => adr.Position).ToList();

            using (var reader = new PostingsReader(new FileStream(posFileName, FileMode.Open, FileAccess.Read, FileShare.Read, 4096 * 1, FileOptions.SequentialScan)))
            {
                var postings = reader.Get(addresses).SelectMany(x => x).ToList();
                yield return(postings);
            }
        }