public void Can_collect_prefixed() { var dir = Path.Combine(Dir, "Can_collect_prefixed"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <Field> { new Field(0, "_id", "0"), new Field(0, "title", "rambo"), new Field(1, "_id", "1"), new Field(1, "title", "rambo 2"), new Field(2, "_id", "2"), new Field(2, "title", "rocky 2"), new Field(3, "_id", "3"), new Field(3, "title", "raiders of the lost ark"), new Field(4, "_id", "4"), new Field(4, "title", "rain man") }.GroupBy(f => f.DocumentId).Select(g => new Document(g.Key, g.ToList())); var writer = new DocumentUpsertOperation(dir, new Analyzer(), compression: Compression.Lz, primaryKey: "_id", documents: docs); long indexName = writer.Commit(); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(new QueryContext("title", "ra") { Prefix = true }).ToList(); Assert.AreEqual(4, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 0)); Assert.IsTrue(scores.Any(d => d.DocumentId == 1)); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); } }
public void Can_collect_exact_phrase_joined_by_and() { var dir = Path.Combine(Setup.Dir, "Can_collect_exact_phrase_joined_by_and"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <Dictionary <string, string> > { new Dictionary <string, string> { { "_id", "0" }, { "title", "rambo first blood" } }, new Dictionary <string, string> { { "_id", "1" }, { "title", "rambo 2" } }, new Dictionary <string, string> { { "_id", "2" }, { "title", "rocky 2" } }, new Dictionary <string, string> { { "_id", "3" }, { "title", "the raiders of the lost ark" } }, new Dictionary <string, string> { { "_id", "4" }, { "title", "the rain man" } }, new Dictionary <string, string> { { "_id", "5" }, { "title", "the good, the bad and the ugly" } } }; string indexName; using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream())) { indexName = writer.Execute(); } var query = new QueryParser(new Analyzer()).Parse("+title:the"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.That(scores.Count, Is.EqualTo(3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); Assert.IsTrue(scores.Any(d => d.DocumentId == 5)); } query = new QueryParser(new Analyzer()).Parse("+title:the +title:ugly"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.That(scores.Count, Is.EqualTo(1)); Assert.IsTrue(scores.Any(d => d.DocumentId == 5)); } }
public void Can_collect_prefixed() { var dir = CreateDir(); var docs = new List <dynamic> { new { _id = "0", title = "rambo" }, new { _id = "1", title = "rambo 2" }, new { _id = "2", title = "rocky 2" }, new { _id = "3", title = "raiders of the lost ark" }, new { _id = "4", title = "rain man" } }.ToDocuments(primaryKeyFieldName: "_id"); var writer = new UpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs); long indexName = writer.Write(); writer.Dispose(); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(new QueryContext("title", "ra") { Prefix = true }).ToList(); Assert.AreEqual(4, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 0)); Assert.IsTrue(scores.Any(d => d.DocumentId == 1)); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); } }
public void Can_collect_near() { var dir = Path.Combine(Setup.Dir, "Can_collect_near"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <Dictionary <string, string> > { new Dictionary <string, string> { { "_id", "0" }, { "title", "rambo" } }, new Dictionary <string, string> { { "_id", "1" }, { "title", "rambo 2" } }, new Dictionary <string, string> { { "_id", "2" }, { "title", "rocky 2" } }, new Dictionary <string, string> { { "_id", "3" }, { "title", "raiders of the lost ark" } }, new Dictionary <string, string> { { "_id", "4" }, { "title", "tomb raider" } } }; string indexName; using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream())) { indexName = writer.Execute(); } using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(new QueryContext("title", "raider") { Fuzzy = false, Edits = 1 }).ToList(); Assert.That(scores.Count, Is.EqualTo(1)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); } using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(new QueryContext("title", "raider") { Fuzzy = true, Edits = 1 }).ToList(); Assert.That(scores.Count, Is.EqualTo(2)); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); } }
public void Can_collect_exact_phrase_joined_by_not() { var dir = Path.Combine(Dir, "Can_collect_exact_phrase_joined_by_not"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <List <Field> > { new List <Field> { new Field("_id", "0"), new Field("title", "rambo first blood") }, new List <Field> { new Field("_id", "1"), new Field("title", "rambo 2") }, new List <Field> { new Field("_id", "2"), new Field("title", "rocky 2") }, new List <Field> { new Field("_id", "3"), new Field("title", "raiders of the lost ark") }, new List <Field> { new Field("_id", "4"), new Field("title", "the rain man") }, new List <Field> { new Field("_id", "5"), new Field("title", "the good, the bad and the ugly") } }; var writer = new DocumentUpsertOperation(dir, new Analyzer(), compression: Compression.QuickLz, primaryKey: "_id", documents: docs); long indexName = writer.Commit(); var query = new QueryParser(new Analyzer()).Parse("+title:the"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(3, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); Assert.IsTrue(scores.Any(d => d.DocumentId == 5)); } query = new QueryParser(new Analyzer()).Parse("+title:the -title:ugly"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(2, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); } }
public void Can_collect_exact_phrase_joined_by_or() { var dir = Path.Combine(CreateDir(), "Can_collect_exact_phrase_joined_by_or"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <dynamic> { new { _id = "0", title = "rambo first blood" }, new { _id = "1", title = "rambo 2" }, new { _id = "2", title = "rocky 2" }, new { _id = "3", title = "raiders of the lost ark" }, new { _id = "4", title = "the rain man" }, new { _id = "5", title = "the good, the bad and the ugly" } }.ToDocuments(); var writer = new DocumentsUpsertOperation(dir, new Analyzer(), compression: Compression.Lz, primaryKey: "_id", documents: docs); long indexName = writer.Commit(); var query = new QueryParser(new Analyzer()).Parse("+title:rocky"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(1, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 2)); } query = new QueryParser(new Analyzer()).Parse("+title:rambo"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(2, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 0)); Assert.IsTrue(scores.Any(d => d.DocumentId == 1)); } query = new QueryParser(new Analyzer()).Parse("+title:rocky title:rambo"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(3, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 0)); Assert.IsTrue(scores.Any(d => d.DocumentId == 1)); Assert.IsTrue(scores.Any(d => d.DocumentId == 2)); } }
public void Can_collect_near() { var dir = Path.Combine(Dir, "Can_collect_near"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <List <Field> > { new List <Field> { new Field("_id", "0"), new Field("title", "rambo") }, new List <Field> { new Field("_id", "1"), new Field("title", "rambo 2") }, new List <Field> { new Field("_id", "2"), new Field("title", "rocky 2") }, new List <Field> { new Field("_id", "3"), new Field("title", "raiders of the lost ark") }, new List <Field> { new Field("_id", "4"), new Field("title", "tomb raider") } }; var writer = new DocumentUpsertOperation(dir, new Analyzer(), compression: Compression.QuickLz, primaryKey: "_id", documents: docs); long indexName = writer.Commit(); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(new QueryContext("title", "raider") { Fuzzy = false, Edits = 1 }).ToList(); Assert.AreEqual(1, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); } using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(new QueryContext("title", "raider") { Fuzzy = true, Edits = 1 }).ToList(); Assert.AreEqual(2, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); } }
public void Can_rank_near_term() { var dir = Path.Combine(Setup.Dir, "Can_rank_near_term"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <Dictionary <string, string> > { new Dictionary <string, string> { { "_id", "0" }, { "title", "Gustav Horn, Count of Pori" } }, new Dictionary <string, string> { { "_id", "1" }, { "title", "Port au Port Peninsula" } }, new Dictionary <string, string> { { "_id", "2" }, { "title", "Pore" } }, new Dictionary <string, string> { { "_id", "3" }, { "title", "Born 2.0" } }, new Dictionary <string, string> { { "_id", "4" }, { "title", "P**n" } } }; string indexName; using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream())) { indexName = writer.Execute(); } var query = new QueryParser(new Analyzer()).Parse("+title:p**n~"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.That(scores.Count, Is.EqualTo(5)); Assert.IsTrue(scores.First().DocumentId.Equals(4)); Assert.IsTrue(scores[1].DocumentId.Equals(0)); Assert.IsTrue(scores[2].DocumentId.Equals(1)); Assert.IsTrue(scores[3].DocumentId.Equals(3)); Assert.IsTrue(scores[4].DocumentId.Equals(2)); } }
public void Can_rank_near_phrase() { var dir = Path.Combine(Setup.Dir, "Can_rank_near_phrase"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <Dictionary <string, string> > { new Dictionary <string, string> { { "_id", "0" }, { "title", "Tage Mage" } }, new Dictionary <string, string> { { "_id", "1" }, { "title", "aye-aye" } }, new Dictionary <string, string> { { "_id", "2" }, { "title", "Cage Rage Championships" } }, new Dictionary <string, string> { { "_id", "3" }, { "title", "Page Up and Page Down keys" } }, new Dictionary <string, string> { { "_id", "4" }, { "title", "Golden Age of P**n" } } }; string indexName; using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream())) { indexName = writer.Execute(); } var query = new QueryParser(new Analyzer()).Parse("+title:age of p**n~"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.That(scores.Count, Is.EqualTo(5)); Assert.IsTrue(scores.First().DocumentId.Equals(4)); } }
static void Export(string[] args) { var take = int.MaxValue; var skip = 0; if (Array.IndexOf(args, "--take") > 0) { take = int.Parse(args[Array.IndexOf(args, "--take") + 1]); } if (Array.IndexOf(args, "--skip") > 0) { skip = int.Parse(args[Array.IndexOf(args, "--skip") + 1]); } var sourceFileName = args[Array.IndexOf(args, "--source-file") + 1]; var targetFileName = args[Array.IndexOf(args, "--target-file") + 1]; var dir = Path.GetDirectoryName(sourceFileName); var version = Path.GetFileNameWithoutExtension(sourceFileName); var ix = IxInfo.Load(Path.Combine(dir, version + ".ix")); Console.WriteLine("migrating..."); var writeTimer = new Stopwatch(); writeTimer.Start(); using (var outStream = new FileStream(targetFileName, FileMode.Create)) using (var jsonWriter = new StreamWriter(outStream, Encoding.UTF8)) using (var documents = new RDocStream(sourceFileName, ix.PrimaryKeyFieldName, skip, take)) { jsonWriter.WriteLine("["); foreach (var document in documents.ReadSource()) { var dic = document.Fields.ToDictionary(x => x.Key, y => y.Value.Value); var json = JsonConvert.SerializeObject(dic, Formatting.None); jsonWriter.WriteLine(json); } jsonWriter.Write("]"); } Console.WriteLine("write operation took {0}", writeTimer.Elapsed); }
public void Can_collect_exact_phrase_joined_by_not() { var dir = CreateDir(); var docs = new List <dynamic> { new { _id = "0", title = "rambo first blood" }, new { _id = "1", title = "rambo 2" }, new { _id = "2", title = "rocky 2" }, new { _id = "3", title = "raiders of the lost ark" }, new { _id = "4", title = "the rain man" }, new { _id = "5", title = "the good, the bad and the ugly" } }.ToDocuments(primaryKeyFieldName: "_id"); var writer = new UpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs); long indexName = writer.Write(); writer.Dispose(); var query = new QueryParser(new Analyzer()).Parse("+title:the"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(3, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); Assert.IsTrue(scores.Any(d => d.DocumentId == 5)); } query = new QueryParser(new Analyzer()).Parse("+title:the -title:ugly"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(2, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); } }
public void Can_collect_near_phrase() { var dir = Path.Combine(Dir, "Can_collect_near_phrase_joined_by_and"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <Field> { new Field(0, "_id", "0"), new Field(0, "title", "rambo first blood"), new Field(1, "_id", "1"), new Field(1, "title", "rambo 2"), new Field(2, "_id", "2"), new Field(2, "title", "rocky 2"), new Field(3, "_id", "3"), new Field(3, "title", "the raid"), new Field(4, "_id", "4"), new Field(4, "title", "the rain man"), new Field(5, "_id", "5"), new Field(5, "title", "the good, the bad and the ugly") }.GroupBy(f => f.DocumentId).Select(g => new Document(g.Key, g.ToList())); var writer = new DocumentUpsertOperation(dir, new Analyzer(), compression: Compression.Lz, primaryKey: "_id", documents: docs); long indexName = writer.Commit(); var query = new QueryParser(new Analyzer()).Parse("+title:rain man"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(1, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); } query = new QueryParser(new Analyzer(), 0.75f).Parse("+title:rain man~"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.AreEqual(1, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); } }
public void Can_delete() { var dir = CreateDir(); var docs = new List <dynamic> { new { _id = "0", title = "rambo first blood" }, new { _id = "1", title = "rambo 2" }, new { _id = "2", title = "rocky 2" }, new { _id = "3", title = "raiders of the lost ark" }, new { _id = "4", title = "the rain man" }, new { _id = "5", title = "the good, the bad and the ugly" } }.ToDocuments(primaryKeyFieldName: "_id"); var writer = new UpsertTransaction(dir, new Analyzer(), compression: Compression.Lz, documents: docs); long indexName = writer.Write(); writer.Dispose(); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(new QueryContext("title", "rambo")).ToList(); Assert.AreEqual(2, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 0)); Assert.IsTrue(scores.Any(d => d.DocumentId == 1)); } var operation = new DeleteByPrimaryKeyTransaction(dir, new[] { "0" }); operation.Commit(); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(new QueryContext("title", "rambo")).ToList(); Assert.AreEqual(1, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 1)); } }
public void Can_collect_by_id() { var dir = Path.Combine(Dir, "Can_collect_by_id"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <Field> { new Field(0, "_id", "abc0123"), new Field(0, "title", "rambo first blood"), new Field(1, "_id", "1"), new Field(1, "title", "rambo 2"), new Field(2, "_id", "2"), new Field(2, "title", "rocky 2"), new Field(3, "_id", "3"), new Field(3, "title", "the raiders of the lost ark"), new Field(4, "_id", "four"), new Field(4, "title", "the rain man"), new Field(5, "_id", "5five"), new Field(5, "title", "the good, the bad and the ugly") }.GroupBy(f => f.DocumentId).Select(g => new Document(g.Key, g.ToList())).OrderBy(d => d.Id); var writer = new DocumentUpsertOperation(dir, new Analyzer(), compression: Compression.Lz, primaryKey: "_id", documents: docs); long indexName = writer.Commit(); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(new QueryContext("_id", "3")).ToList(); Assert.AreEqual(1, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); } using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(new QueryContext("_id", "5five")).ToList(); Assert.AreEqual(1, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 5)); } }
public void Can_collect_by_id() { var dir = Path.Combine(CreateDir(), "Can_collect_by_id"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <dynamic> { new { _id = "abc0123", title = "rambo first blood" }, new { _id = "1", title = "rambo 2" }, new { _id = "2", title = "rocky 2" }, new { _id = "3", title = "the raiders of the lost ark" }, new { _id = "four", title = "the rain man" }, new { _id = "5five", title = "the good, the bad and the ugly" } }.ToDocuments(); var writer = new DocumentsUpsertOperation(dir, new Analyzer(), compression: Compression.Lz, primaryKey: "_id", documents: docs); long indexName = writer.Commit(); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(new QueryContext("_id", "3")).ToList(); Assert.AreEqual(1, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); } using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(new QueryContext("_id", "5five")).ToList(); Assert.AreEqual(1, scores.Count); Assert.IsTrue(scores.Any(d => d.DocumentId == 5)); } }