public static int EstimateKnrEnsuringSharedNeighborhoods(MetricDB db, Index refs, int k, int numQueries = 256) { // this strategy consist on ensure that neighborhoods of the query and all its knn are shared // update: we introduce a probability to reduce noisy hard queries // NOTICE It cannot be adjusted for 1-nn because we are using database items as training objects // it will produce valid values for 2-nn and more Sequential seq = new Sequential (); var overlappingMinProb = 1.0; if (k < 10) { overlappingMinProb = 1.0; } seq.Build (db); var n = db.Count; var Kmax = 128; // large k will need no extra items, but smaller ones (1 or 2) will need a small constant var Kmin = 1; foreach (var qID in RandomSets.GetRandomSubSet (numQueries, n)) { var q = db [qID]; var qknr = Result2Sequence(refs.SearchKNN(q, Kmax)); var list = new List<int[]> (k); foreach (var p in seq.SearchKNN (db [qID], k)) { list.Add (Result2Sequence(refs.SearchKNN(db[p.ObjID], Kmax))); } var qset = new HashSet<int>(); var overlapping = 0; for (int i = 0; i < Kmin; ++i) { qset.Add (qknr [i]); } for (int i = 0; i < Kmax && overlapping < list.Count * overlappingMinProb; ++i) { qset.Add (qknr [i]); overlapping = 0; for (int j = 0; j < list.Count; ++j) { if (list [j] == null) { ++overlapping; } else if (qset.Contains(list [j] [i])) { list [j] = null; ++overlapping; } } Kmin = Math.Max (Kmin, i + 1); } } return Kmin; }
public static int[] GetFP(object a, Index refs, int k) { var knr = refs.SearchKNN (a, k); var aseq = new int[knr.Count]; int i = 0; foreach (var p in knr) { aseq [i] = p.ObjID; ++i; } return aseq; }
/// <summary> /// Search shell (not interactive at this level) /// </summary> public static void Search(Index index, IEnumerable<CommandQuery> qReader, ShellSearchOptions searchOps) { BinaryWriter ResultOutput = null; if (searchOps.ResultName != null) { ResultOutput = new BinaryWriter (File.Create (searchOps.ResultName + ".tmp")); } int qid = 0; long totaltime = 0; SearchCost totalCost = new SearchCost (0, 0); if (ResultOutput != null) { var reslist = new ResultList (searchOps.IndexName, searchOps.QueryName); // Dirty.SerializeBinary (Output, reslist); reslist.Save (ResultOutput); } foreach (CommandQuery qItem in qReader) { long tstart = DateTime.Now.Ticks; SearchCost startCost = index.Cost; IResult res; var qobj = qItem.QObj; if (qobj == null) { qobj = index.DB.Parse (qItem.QRaw, true); } if (qItem.QTypeIsRange) { res = index.SearchRange (qobj, qItem.QArg); } else { res = index.SearchKNN (qobj, (int)qItem.QArg); } var qraw = qItem.QRaw; if (qraw.Length > 1024) { qraw = "<very-large-qraw>"; } SearchCost finalCost = index.Cost; finalCost.Internal -= startCost.Internal; finalCost.Total -= startCost.Total; totalCost.Internal += finalCost.Internal; totalCost.Total += finalCost.Total; long time = DateTime.Now.Ticks - tstart; totaltime += time; ResultInfo info = new ResultInfo (qid, qItem.EncodeQTypeQArgInSign(), qraw, finalCost, new TimeSpan (time), res); Console.WriteLine ("== qid: {0}", qid); Console.WriteLine ("== index: {0}, db: {1}, result: {2}", index, index.DB.Name, searchOps.ResultName); if (ResultOutput != null) { // Dirty.SerializeBinary (ResultOutput, info); info.Save (ResultOutput); } Console.WriteLine (info.ToString (searchOps.ShowMaxResult, null)); qid++; } if (ResultOutput != null) { ResultOutput.Close (); if (File.Exists (searchOps.ResultName)) { File.Delete (searchOps.ResultName); } File.Move (searchOps.ResultName + ".tmp", searchOps.ResultName); } Console.WriteLine ("Number queries: {0}", qid); Console.WriteLine ("Average total-numdists: {0}", (totalCost.Total + 0.0) / qid); Console.WriteLine ("Average internal-distances: {0}", (totalCost.Internal + 0.0) / qid); Console.WriteLine ("Average external-distances: {0}", (totalCost.Total - totalCost.Internal + 0.0) / qid); Console.WriteLine ("Total search time: {0}", (new TimeSpan (totaltime)).TotalSeconds); Console.WriteLine ("Average search time: {0}", (new TimeSpan (totaltime / qid)).TotalSeconds); }
/// <summary> /// Search shell (not interactive at this level) /// </summary> public static void Search(Index index, IEnumerable<CommandQuery> qReader, ShellSearchOptions searchOps) { var summary = new ResultSummary () { ResultName = searchOps.ResultName, IndexName = searchOps.IndexName, QueriesName = searchOps.QueryName }; int qid = 0; long totaltime = 0; SearchCost totalCost = new SearchCost (0, 0); foreach (CommandQuery qItem in qReader) { long tstart = DateTime.Now.Ticks; SearchCost startCost = index.Cost; IResult res; var qobj = qItem.QObj; if (qobj == null) { qobj = index.DB.Parse (qItem.QRaw); } if (qItem.QTypeIsRange) { res = index.SearchRange (qobj, qItem.QArg); } else { res = index.SearchKNN (qobj, (int)qItem.QArg); } var qraw = qItem.QRaw; SearchCost finalCost = index.Cost; finalCost.Internal -= startCost.Internal; finalCost.Total -= startCost.Total; totalCost.Internal += finalCost.Internal; totalCost.Total += finalCost.Total; long time = DateTime.Now.Ticks - tstart; totaltime += time; var query = new Query () { QueryID = qid, QueryType = qItem.EncodeQTypeQArgInSign(), QueryRaw = qraw, SearchCostTotal = finalCost.Total, SearchCostInternal = finalCost.Internal, SearchTime = (new TimeSpan(time)).TotalSeconds, Result = new List<ItemPair>(res) }; Console.WriteLine ("----- QueryID: {0}, QueryType: {1} -----", query.QueryID, query.QueryType); if (res.Count == 0) { Console.WriteLine ("- results> empty result set"); } else { Console.WriteLine ("- results> count: {0}, first-dist: {1}, last-dist: {2}", res.Count, res.First.Dist, res.CoveringRadius); } Console.WriteLine ("- search-time: {0}, cost-internal-distances: {1}, cost-total-distances: {2}", query.SearchTime, query.SearchCostInternal, query.SearchCostTotal); Console.WriteLine ("- index: {0}, db: {1}, result: {2}", index, Path.GetFileName(index.DB.Name), Path.GetFileName(searchOps.ResultName)); summary.Add (query); qid++; } summary.ComputeSummary (); if (searchOps.ResultName != null) { File.WriteAllText (searchOps.ResultName, JsonConvert.SerializeObject (summary, Formatting.Indented)); } Console.WriteLine ("Number queries: {0}", qid); Console.WriteLine ("Average total-numdists: {0}", (totalCost.Total + 0.0) / qid); Console.WriteLine ("Average internal-distances: {0}", (totalCost.Internal + 0.0) / qid); Console.WriteLine ("Average external-distances: {0}", (totalCost.Total - totalCost.Internal + 0.0) / qid); Console.WriteLine ("Total search time: {0}", (new TimeSpan (totaltime)).TotalSeconds); Console.WriteLine ("Average search time: {0}", (new TimeSpan (totaltime / qid)).TotalSeconds); }