protected virtual IResult GetCandidates(int[] qseq, int maxcand) { // var n = this.DB.Count; // if (n < 500000) { // //return this.GetCandidatesSmallDB (qseq, maxcand); // } var len_qseq = qseq.Length; var ialg = new BaezaYatesIntersection<int> (new DoublingSearch<int> ()); IList<int> C = new SortedListRSCache (this.SEQ.Unravel (qseq [0])); int i = 1; while (i < len_qseq && C.Count > maxcand) { var rs = this.SEQ.Unravel (qseq [i]); var I = new SortedListRSCache (rs, -i); var L = new List<IList<int>> () {C, I}; var tmp = ialg.Intersection (L); ++i; if (tmp.Count < maxcand) { break; } C = tmp; } var res = new ResultTies (int.MaxValue, false); foreach (var c in C) { if (c % this.K == 0) { res.Push (c / this.K, 0); } } return res; }
public static void Main(string[] args) { string cmd = null; string basename = null; string inputdir = null; string listfiles = null; string extension = null; string searchalg = "galloping"; string interalg = "in-order-tree"; OptionSet op = new OptionSet () { {"help", "Show help", (v) => cmd = "help"}, {"build", "Build an index", (v) => cmd = "build"}, {"dir=", "Directory to scan", (v) => inputdir = v}, {"list=", "List of filenames (one per line)", (v) => listfiles = v}, {"ext=", "Valid extension", (v) => extension = v}, {"search", "Search for a query in an index", (v) => cmd = "search"}, {"index=", "Base name for the index (build or search)", (v) => basename = v}, {"search-algorithm=", "Choose the search algorithm (sequential|binary-search|galloping|backward-galloping)", (v) => searchalg = v}, {"intersection-algorithm=", "Choose an intersection algorithm (sequential|svs|small-adaptive|barbay-sequential|barbay-randomized|in-order-tree)", (v) => interalg = v} }; var Largs = op.Parse (args); if (Largs.Count > 0) { Console.WriteLine ("Unknown arguments, valid options:"); op.WriteOptionDescriptions (Console.Out); return; } var seq = new SeqTextIR (); switch (cmd) { case "build": if (inputdir == null && listfiles == null) { goto default; } if (basename == null) { goto default; } // var seq_builder = SequenceBuilders.GetGolynskiSucc (12); //var seq_builder = SequenceBuilders.GetSeqXLB_SArray64 (12); var seq_builder = SequenceBuilders.GetSeqXLB_DiffSetRL64 (12, 31, new EliasDelta64 ()); //var seq_builder = SequenceBuilders.GetSeqXLB_DiffSetRL2_64 (12, 31, new EliasDelta64 ()); Console.WriteLine ("*** building SeqTextIR instance over {0} | filter {1}", inputdir, extension); //var seq_container = new List<int>(); using (var seq_container = new SepDiskList32<int>(basename + ".memdata", 1<<12)) { //var seq_container = new MemoryMappedList<int>("H", false); // false); if (listfiles == null) { seq.Build (GetFilenames (inputdir, extension), seq_builder, seq_container); } else { seq.Build (File.ReadAllLines (listfiles), seq_builder, seq_container); } } Console.WriteLine ("*** saving"); seq.Save (basename); /*{ string filename; int sp; int len; Console.WriteLine (); File.WriteAllText ("out-test-docid-0", seq.GetFileData (0, out filename, out sp, out len)); Console.WriteLine ("check file: {0}", filename); }*/ break; case "search": if (basename == null) { goto default; } ISearchAlgorithm<int> salg = null; switch (searchalg.ToLower ()) { case "galloping": salg = new DoublingSearch<int> (); break; //case "backward-galloping": // salg = new BackwardDoublingSearch(); // break; case "binary-search": salg = new BinarySearch<int> (); break; case "sequential": salg = new SequentialSearch<int> (); break; default: Console.WriteLine ("Unknown search algorithm: {0}", searchalg); ShowHelp (op, cmd); return; } IIntersection<int> ialg = null; switch (interalg.ToLower ()) { case "svs": ialg = new SvS<int> (salg); break; case "small-adaptive": ialg = new SmallAdaptive<int> (salg); break; case "barbay-sequential": ialg = new BarbaySequential<int> (salg); break; case "barbay-randomized": ialg = new BarbayRandomized<int> (salg); break; case "baeza-yates": ialg = new BaezaYatesIntersection<int> (salg); break; case "in-order-tree": ialg = new InOrderUnbalancedTreeIntersection<int> (0.5, salg); break; case "sequential": throw new NotImplementedException ("sequential intersection is not supported. Instead use svs + sequential search"); default: ShowHelp (op, cmd); return; } seq.Load (basename); while (true) { Console.WriteLine ("query [enter]"); string query = Console.ReadLine (); if (query == null || query == "") { break; } Search (seq, query, ialg); } break; case "help": default: ShowHelp (op, cmd); break; } }