Esempio n. 1
0
 protected virtual IResult GetCandidates(int[] qseq, int maxcand)
 {
     //			var n = this.DB.Count;
     //			if (n < 500000) {
     //				//return this.GetCandidatesSmallDB (qseq, maxcand);
     //			}
     var len_qseq = qseq.Length;
     var ialg = new BaezaYatesIntersection<int> (new DoublingSearch<int> ());
     IList<int> C = new SortedListRSCache (this.SEQ.Unravel (qseq [0]));
     int i = 1;
     while (i < len_qseq && C.Count > maxcand) {
         var rs = this.SEQ.Unravel (qseq [i]);
         var I = new SortedListRSCache (rs, -i);
         var L = new List<IList<int>> () {C, I};
         var tmp = ialg.Intersection (L);
         ++i;
         if (tmp.Count < maxcand) {
             break;
         }
         C = tmp;
     }
     var res = new ResultTies (int.MaxValue, false);
     foreach (var c in C) {
         if (c % this.K == 0) {
             res.Push (c / this.K, 0);
         }
     }
     return res;
 }
Esempio n. 2
0
        public static void Main(string[] args)
        {
            string cmd = null;
            string basename = null;
            string inputdir = null;
            string listfiles = null;
            string extension = null;
            string searchalg = "galloping";
            string interalg = "in-order-tree";
            OptionSet op = new OptionSet () {
                {"help", "Show help", (v) => cmd = "help"},
                {"build", "Build an index", (v) => cmd = "build"},
                {"dir=", "Directory to scan", (v) => inputdir = v},
                {"list=", "List of filenames (one per line)", (v) => listfiles = v},
                {"ext=", "Valid extension", (v) => extension = v},
                {"search", "Search for a query in an index", (v) => cmd = "search"},
                {"index=", "Base name for the index (build or search)", (v) => basename = v},
                {"search-algorithm=", "Choose the search algorithm (sequential|binary-search|galloping|backward-galloping)",
                    (v) => searchalg = v},
                {"intersection-algorithm=", "Choose an intersection algorithm (sequential|svs|small-adaptive|barbay-sequential|barbay-randomized|in-order-tree)",
                    (v) => interalg = v}
            };

            var Largs = op.Parse (args);
            if (Largs.Count > 0) {
                Console.WriteLine ("Unknown arguments, valid options:");
                op.WriteOptionDescriptions (Console.Out);
                return;
            }
            var seq = new SeqTextIR ();
            switch (cmd) {
            case "build":
                if (inputdir == null && listfiles == null) {
                    goto default;
                }
                if (basename == null) {
                    goto default;
                }
                // var seq_builder = SequenceBuilders.GetGolynskiSucc (12);
                //var seq_builder = SequenceBuilders.GetSeqXLB_SArray64 (12);
                var seq_builder = SequenceBuilders.GetSeqXLB_DiffSetRL64 (12, 31, new EliasDelta64 ());
                //var seq_builder = SequenceBuilders.GetSeqXLB_DiffSetRL2_64 (12, 31, new EliasDelta64 ());
                Console.WriteLine ("*** building SeqTextIR instance over {0} | filter {1}", inputdir, extension);
                //var seq_container = new List<int>();
                using (var seq_container = new SepDiskList32<int>(basename + ".memdata", 1<<12)) {
                    //var seq_container = new MemoryMappedList<int>("H", false); // false);
                    if (listfiles == null) {
                        seq.Build (GetFilenames (inputdir, extension), seq_builder, seq_container);
                    } else {
                        seq.Build (File.ReadAllLines (listfiles), seq_builder, seq_container);
                    }
                }
                Console.WriteLine ("*** saving");
                seq.Save (basename);
                /*{
                    string filename;
                    int sp;
                    int len;
                    Console.WriteLine ();
                    File.WriteAllText ("out-test-docid-0", seq.GetFileData (0, out filename, out sp, out len));
                    Console.WriteLine ("check file: {0}", filename);
                }*/
                break;
            case "search":
                if (basename == null) {
                    goto default;
                }
                ISearchAlgorithm<int> salg = null;
                switch (searchalg.ToLower ()) {
                case "galloping":
                    salg = new DoublingSearch<int> ();
                    break;
                //case "backward-galloping":
                //	salg = new BackwardDoublingSearch();
                //	break;
                case "binary-search":
                    salg = new BinarySearch<int> ();
                    break;
                case "sequential":
                    salg = new SequentialSearch<int> ();
                    break;
                default:
                    Console.WriteLine ("Unknown search algorithm: {0}", searchalg);
                    ShowHelp (op, cmd);
                    return;
                }
                IIntersection<int> ialg = null;
                switch (interalg.ToLower ()) {
                case "svs":
                    ialg = new SvS<int> (salg);
                    break;
                case "small-adaptive":
                    ialg = new SmallAdaptive<int> (salg);
                    break;
                case "barbay-sequential":
                    ialg = new BarbaySequential<int> (salg);
                    break;
                case "barbay-randomized":
                    ialg = new BarbayRandomized<int> (salg);
                    break;
                case "baeza-yates":
                    ialg = new BaezaYatesIntersection<int> (salg);
                    break;
                case "in-order-tree":
                    ialg = new InOrderUnbalancedTreeIntersection<int> (0.5, salg);
                    break;
                case "sequential":
                    throw new NotImplementedException ("sequential intersection is not supported. Instead use svs + sequential search");
                default:
                    ShowHelp (op, cmd);
                    return;
                }
                seq.Load (basename);
                while (true) {
                    Console.WriteLine ("query [enter]");
                    string query = Console.ReadLine ();
                    if (query == null || query == "") {
                        break;
                    }
                    Search (seq, query, ialg);
                }
                break;
            case "help":
            default:
                ShowHelp (op, cmd);
                break;
            }
        }