Exemple #1
0
        public void Can_collect_exact_phrase_joined_by_and()
        {
            var dir = Path.Combine(Setup.Dir, "Can_collect_exact_phrase_joined_by_and");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <Dictionary <string, string> >
            {
                new Dictionary <string, string> {
                    { "_id", "0" }, { "title", "rambo first blood" }
                },
                new Dictionary <string, string> {
                    { "_id", "1" }, { "title", "rambo 2" }
                },
                new Dictionary <string, string> {
                    { "_id", "2" }, { "title", "rocky 2" }
                },
                new Dictionary <string, string> {
                    { "_id", "3" }, { "title", "the raiders of the lost ark" }
                },
                new Dictionary <string, string> {
                    { "_id", "4" }, { "title", "the rain man" }
                },
                new Dictionary <string, string> {
                    { "_id", "5" }, { "title", "the good, the bad and the ugly" }
                }
            };

            string indexName;

            using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream()))
            {
                indexName = writer.Execute();
            }

            var query = new QueryParser(new Analyzer()).Parse("+title:the");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.That(scores.Count, Is.EqualTo(3));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 5));
            }

            query = new QueryParser(new Analyzer()).Parse("+title:the +title:ugly");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.That(scores.Count, Is.EqualTo(1));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 5));
            }
        }
Exemple #2
0
        public void Can_collect_near()
        {
            var dir = Path.Combine(Setup.Dir, "Can_collect_near");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <Dictionary <string, string> >
            {
                new Dictionary <string, string> {
                    { "_id", "0" }, { "title", "rambo" }
                },
                new Dictionary <string, string> {
                    { "_id", "1" }, { "title", "rambo 2" }
                },
                new Dictionary <string, string> {
                    { "_id", "2" }, { "title", "rocky 2" }
                },
                new Dictionary <string, string> {
                    { "_id", "3" }, { "title", "raiders of the lost ark" }
                },
                new Dictionary <string, string> {
                    { "_id", "4" }, { "title", "tomb raider" }
                }
            };

            string indexName;

            using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream()))
            {
                indexName = writer.Execute();
            }

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(new QueryContext("title", "raider")
                {
                    Fuzzy = false, Edits = 1
                }).ToList();

                Assert.That(scores.Count, Is.EqualTo(1));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
            }

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(new QueryContext("title", "raider")
                {
                    Fuzzy = true, Edits = 1
                }).ToList();

                Assert.That(scores.Count, Is.EqualTo(2));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 3));
                Assert.IsTrue(scores.Any(d => d.DocumentId == 4));
            }
        }
Exemple #3
0
        static void Write(string[] args)
        {
            var take = 1000;

            if (Array.IndexOf(args, "--take") > 0)
            {
                take = int.Parse(args[Array.IndexOf(args, "--take") + 1]);
            }

            var    fileName  = args[Array.IndexOf(args, "--file") + 1];
            string dir       = null;
            string indexName = null;

            if (Array.IndexOf(args, "--dir") > 0)
            {
                dir = args[Array.IndexOf(args, "--dir") + 1];
            }
            if (Array.IndexOf(args, "--name") > 0)
            {
                indexName = args[Array.IndexOf(args, "--name") + 1];
            }

            var url    = ConfigurationManager.AppSettings.Get("sir.endpoint");
            var inproc = !string.IsNullOrWhiteSpace(dir);

            Console.WriteLine("writing...");

            var docs = new List <Dictionary <string, string> >();

            var writeTimer = new Stopwatch();

            writeTimer.Start();

            if (inproc)
            {
                if (!Directory.Exists(dir))
                {
                    Directory.CreateDirectory(dir);
                }
                using (var writer = new StreamWriteOperation(dir, new Analyzer(), fileName, take))
                {
                    writer.Execute();
                }
            }
            else
            {
                Console.WriteLine("Executing HTTP POST");

                using (var client = new WriterClient(indexName, url))
                {
                    client.Write(docs);
                }
            }

            Console.WriteLine("write operation took {0}", writeTimer.Elapsed);
        }
Exemple #4
0
        public void Can_rank_near_term()
        {
            var dir = Path.Combine(Setup.Dir, "Can_rank_near_term");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <Dictionary <string, string> >
            {
                new Dictionary <string, string> {
                    { "_id", "0" }, { "title", "Gustav Horn, Count of Pori" }
                },
                new Dictionary <string, string> {
                    { "_id", "1" }, { "title", "Port au Port Peninsula" }
                },
                new Dictionary <string, string> {
                    { "_id", "2" }, { "title", "Pore" }
                },
                new Dictionary <string, string> {
                    { "_id", "3" }, { "title", "Born 2.0" }
                },
                new Dictionary <string, string> {
                    { "_id", "4" }, { "title", "P**n" }
                }
            };

            string indexName;

            using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream()))
            {
                indexName = writer.Execute();
            }

            var query = new QueryParser(new Analyzer()).Parse("+title:p**n~");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.That(scores.Count, Is.EqualTo(5));
                Assert.IsTrue(scores.First().DocumentId.Equals(4));
                Assert.IsTrue(scores[1].DocumentId.Equals(0));
                Assert.IsTrue(scores[2].DocumentId.Equals(1));
                Assert.IsTrue(scores[3].DocumentId.Equals(3));
                Assert.IsTrue(scores[4].DocumentId.Equals(2));
            }
        }
Exemple #5
0
        public void Can_rank_near_phrase()
        {
            var dir = Path.Combine(Setup.Dir, "Can_rank_near_phrase");

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            var docs = new List <Dictionary <string, string> >
            {
                new Dictionary <string, string> {
                    { "_id", "0" }, { "title", "Tage Mage" }
                },
                new Dictionary <string, string> {
                    { "_id", "1" }, { "title", "aye-aye" }
                },
                new Dictionary <string, string> {
                    { "_id", "2" }, { "title", "Cage Rage Championships" }
                },
                new Dictionary <string, string> {
                    { "_id", "3" }, { "title", "Page Up and Page Down keys" }
                },
                new Dictionary <string, string> {
                    { "_id", "4" }, { "title", "Golden Age of P**n" }
                }
            };

            string indexName;

            using (var writer = new StreamWriteOperation(dir, new Analyzer(), docs.ToStream()))
            {
                indexName = writer.Execute();
            }

            var query = new QueryParser(new Analyzer()).Parse("+title:age of p**n~");

            using (var collector = new Collector(dir, IxInfo.Load(Path.Combine(dir, indexName + ".ix")), new Tfidf()))
            {
                var scores = collector.Collect(query).ToList();

                Assert.That(scores.Count, Is.EqualTo(5));
                Assert.IsTrue(scores.First().DocumentId.Equals(4));
            }
        }