public static void Main(string[] args) { string cmd = null; string basename = null; string inputdir = null; string listfiles = null; string extension = null; string searchalg = "galloping"; string interalg = "in-order-tree"; OptionSet op = new OptionSet () { {"help", "Show help", (v) => cmd = "help"}, {"build", "Build an index", (v) => cmd = "build"}, {"dir=", "Directory to scan", (v) => inputdir = v}, {"list=", "List of filenames (one per line)", (v) => listfiles = v}, {"ext=", "Valid extension", (v) => extension = v}, {"search", "Search for a query in an index", (v) => cmd = "search"}, {"index=", "Base name for the index (build or search)", (v) => basename = v}, {"search-algorithm=", "Choose the search algorithm (sequential|binary-search|galloping|backward-galloping)", (v) => searchalg = v}, {"intersection-algorithm=", "Choose an intersection algorithm (sequential|svs|small-adaptive|barbay-sequential|barbay-randomized|in-order-tree)", (v) => interalg = v} }; var Largs = op.Parse (args); if (Largs.Count > 0) { Console.WriteLine ("Unknown arguments, valid options:"); op.WriteOptionDescriptions (Console.Out); return; } var seq = new SeqTextIR (); switch (cmd) { case "build": if (inputdir == null && listfiles == null) { goto default; } if (basename == null) { goto default; } // var seq_builder = SequenceBuilders.GetGolynskiSucc (12); //var seq_builder = SequenceBuilders.GetSeqXLB_SArray64 (12); var seq_builder = SequenceBuilders.GetSeqXLB_DiffSetRL64 (12, 31, new EliasDelta64 ()); //var seq_builder = SequenceBuilders.GetSeqXLB_DiffSetRL2_64 (12, 31, new EliasDelta64 ()); Console.WriteLine ("*** building SeqTextIR instance over {0} | filter {1}", inputdir, extension); //var seq_container = new List<int>(); using (var seq_container = new SepDiskList32<int>(basename + ".memdata", 1<<12)) { //var seq_container = new MemoryMappedList<int>("H", false); // false); if (listfiles == null) { seq.Build (GetFilenames (inputdir, extension), seq_builder, seq_container); } else { seq.Build (File.ReadAllLines (listfiles), seq_builder, seq_container); } } Console.WriteLine ("*** saving"); seq.Save (basename); /*{ string filename; int sp; int len; Console.WriteLine (); File.WriteAllText ("out-test-docid-0", seq.GetFileData (0, out filename, out sp, out len)); Console.WriteLine ("check file: {0}", filename); }*/ break; case "search": if (basename == null) { goto default; } ISearchAlgorithm<int> salg = null; switch (searchalg.ToLower ()) { case "galloping": salg = new DoublingSearch<int> (); break; //case "backward-galloping": // salg = new BackwardDoublingSearch(); // break; case "binary-search": salg = new BinarySearch<int> (); break; case "sequential": salg = new SequentialSearch<int> (); break; default: Console.WriteLine ("Unknown search algorithm: {0}", searchalg); ShowHelp (op, cmd); return; } IIntersection<int> ialg = null; switch (interalg.ToLower ()) { case "svs": ialg = new SvS<int> (salg); break; case "small-adaptive": ialg = new SmallAdaptive<int> (salg); break; case "barbay-sequential": ialg = new BarbaySequential<int> (salg); break; case "barbay-randomized": ialg = new BarbayRandomized<int> (salg); break; case "baeza-yates": ialg = new BaezaYatesIntersection<int> (salg); break; case "in-order-tree": ialg = new InOrderUnbalancedTreeIntersection<int> (0.5, salg); break; case "sequential": throw new NotImplementedException ("sequential intersection is not supported. Instead use svs + sequential search"); default: ShowHelp (op, cmd); return; } seq.Load (basename); while (true) { Console.WriteLine ("query [enter]"); string query = Console.ReadLine (); if (query == null || query == "") { break; } Search (seq, query, ialg); } break; case "help": default: ShowHelp (op, cmd); break; } }
public static void Search(SeqTextIR seq, string q, IIntersection<int> ialg) { Console.WriteLine ("Query: '{0}'", q); long itime = DateTime.Now.Ticks; var res = seq.SearchPhrase (q, ialg); int i = 0; int max_len_snippet = 128; foreach (var occ in res) { if (i >= 10) { Console.WriteLine ("Too many results skipping to the end"); break; } i++; string uri; int docid; int sp; int len; string snippet = seq.GetSnippet (occ, max_len_snippet, out uri, out docid, out sp, out len); Console.WriteLine (); Console.WriteLine ("=========== ResultID {0}, occpos: {1}, uri: {2}", i, occ, uri); Console.WriteLine ("=========== Start snippet, snippet len: {0}, docid: {1}, offset_file: {2}", snippet.Length, docid, sp); Console.WriteLine (snippet); Console.WriteLine ("=========== End snippet"); } Console.WriteLine ("Total results: {0}", i); var tspan = TimeSpan.FromTicks (DateTime.Now.Ticks - itime); Console.WriteLine ("query time: {0}", tspan); }