public void Can_collect_exact_phrase_joined_by_and() { var dir = Path.Combine(Setup.Dir, "Can_collect_exact_phrase_joined_by_and"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <Dictionary <string, string> > { new Dictionary <string, string> { { "_id", "0" }, { "title", "rambo first blood" } }, new Dictionary <string, string> { { "_id", "1" }, { "title", "rambo 2" } }, new Dictionary <string, string> { { "_id", "2" }, { "title", "rocky 2" } }, new Dictionary <string, string> { { "_id", "3" }, { "title", "the raiders of the lost ark" } }, new Dictionary <string, string> { { "_id", "4" }, { "title", "the rain man" } }, new Dictionary <string, string> { { "_id", "5" }, { "title", "the good, the bad and the ugly" } } }; string indexName; using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream())) { indexName = writer.Execute(); } var query = new QueryParser(new Analyzer()).Parse("+title:the"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.That(scores.Count, Is.EqualTo(3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); Assert.IsTrue(scores.Any(d => d.DocumentId == 5)); } query = new QueryParser(new Analyzer()).Parse("+title:the +title:ugly"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.That(scores.Count, Is.EqualTo(1)); Assert.IsTrue(scores.Any(d => d.DocumentId == 5)); } }
public void Can_collect_near() { var dir = Path.Combine(Setup.Dir, "Can_collect_near"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <Dictionary <string, string> > { new Dictionary <string, string> { { "_id", "0" }, { "title", "rambo" } }, new Dictionary <string, string> { { "_id", "1" }, { "title", "rambo 2" } }, new Dictionary <string, string> { { "_id", "2" }, { "title", "rocky 2" } }, new Dictionary <string, string> { { "_id", "3" }, { "title", "raiders of the lost ark" } }, new Dictionary <string, string> { { "_id", "4" }, { "title", "tomb raider" } } }; string indexName; using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream())) { indexName = writer.Execute(); } using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(new QueryContext("title", "raider") { Fuzzy = false, Edits = 1 }).ToList(); Assert.That(scores.Count, Is.EqualTo(1)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); } using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(new QueryContext("title", "raider") { Fuzzy = true, Edits = 1 }).ToList(); Assert.That(scores.Count, Is.EqualTo(2)); Assert.IsTrue(scores.Any(d => d.DocumentId == 3)); Assert.IsTrue(scores.Any(d => d.DocumentId == 4)); } }
static void Write(string[] args) { var take = 1000; if (Array.IndexOf(args, "--take") > 0) { take = int.Parse(args[Array.IndexOf(args, "--take") + 1]); } var fileName = args[Array.IndexOf(args, "--file") + 1]; string dir = null; string indexName = null; if (Array.IndexOf(args, "--dir") > 0) { dir = args[Array.IndexOf(args, "--dir") + 1]; } if (Array.IndexOf(args, "--name") > 0) { indexName = args[Array.IndexOf(args, "--name") + 1]; } var url = ConfigurationManager.AppSettings.Get("sir.endpoint"); var inproc = !string.IsNullOrWhiteSpace(dir); Console.WriteLine("writing..."); var docs = new List <Dictionary <string, string> >(); var writeTimer = new Stopwatch(); writeTimer.Start(); if (inproc) { if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } using (var writer = new StreamWriteOperation(dir, new Analyzer(), fileName, take)) { writer.Execute(); } } else { Console.WriteLine("Executing HTTP POST"); using (var client = new WriterClient(indexName, url)) { client.Write(docs); } } Console.WriteLine("write operation took {0}", writeTimer.Elapsed); }
public void Can_rank_near_term() { var dir = Path.Combine(Setup.Dir, "Can_rank_near_term"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <Dictionary <string, string> > { new Dictionary <string, string> { { "_id", "0" }, { "title", "Gustav Horn, Count of Pori" } }, new Dictionary <string, string> { { "_id", "1" }, { "title", "Port au Port Peninsula" } }, new Dictionary <string, string> { { "_id", "2" }, { "title", "Pore" } }, new Dictionary <string, string> { { "_id", "3" }, { "title", "Born 2.0" } }, new Dictionary <string, string> { { "_id", "4" }, { "title", "P**n" } } }; string indexName; using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream())) { indexName = writer.Execute(); } var query = new QueryParser(new Analyzer()).Parse("+title:p**n~"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.That(scores.Count, Is.EqualTo(5)); Assert.IsTrue(scores.First().DocumentId.Equals(4)); Assert.IsTrue(scores[1].DocumentId.Equals(0)); Assert.IsTrue(scores[2].DocumentId.Equals(1)); Assert.IsTrue(scores[3].DocumentId.Equals(3)); Assert.IsTrue(scores[4].DocumentId.Equals(2)); } }
public void Can_rank_near_phrase() { var dir = Path.Combine(Setup.Dir, "Can_rank_near_phrase"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } var docs = new List <Dictionary <string, string> > { new Dictionary <string, string> { { "_id", "0" }, { "title", "Tage Mage" } }, new Dictionary <string, string> { { "_id", "1" }, { "title", "aye-aye" } }, new Dictionary <string, string> { { "_id", "2" }, { "title", "Cage Rage Championships" } }, new Dictionary <string, string> { { "_id", "3" }, { "title", "Page Up and Page Down keys" } }, new Dictionary <string, string> { { "_id", "4" }, { "title", "Golden Age of P**n" } } }; string indexName; using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream())) { indexName = writer.Execute(); } var query = new QueryParser(new Analyzer()).Parse("+title:age of p**n~"); using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf())) { var scores = collector.Collect(query).ToList(); Assert.That(scores.Count, Is.EqualTo(5)); Assert.IsTrue(scores.First().DocumentId.Equals(4)); } }