Ejemplo n.º 1
0
        public static int EstimateKnrEnsuringSharedNeighborhoods(MetricDB db, Index refs, int k, int numQueries = 256)
        {
            // this strategy consist on ensure that neighborhoods of the query and all its knn are shared
            // update: we introduce a probability to reduce noisy hard queries
            // NOTICE It cannot be adjusted for 1-nn because we are using database items as training objects
            // it will produce valid values for 2-nn and more
            Sequential seq = new Sequential ();
            var overlappingMinProb = 1.0;
            if (k < 10) {
                overlappingMinProb = 1.0;
            }
            seq.Build (db);
            var n = db.Count;
            var Kmax = 128; // large k will need no extra items, but smaller ones (1 or 2) will need a small constant
            var Kmin = 1;

            foreach (var qID in RandomSets.GetRandomSubSet (numQueries, n)) {
                var q = db [qID];
                var qknr = Result2Sequence(refs.SearchKNN(q, Kmax));
                var list = new List<int[]> (k);

                foreach (var p in seq.SearchKNN (db [qID], k)) {
                    list.Add (Result2Sequence(refs.SearchKNN(db[p.ObjID], Kmax)));
                }

                var qset = new HashSet<int>();
                var overlapping = 0;

                for (int i = 0; i < Kmin; ++i) {
                    qset.Add (qknr [i]);
                }
                for (int i = 0; i < Kmax && overlapping < list.Count * overlappingMinProb; ++i) {
                    qset.Add (qknr [i]);
                    overlapping = 0;
                    for (int j = 0; j < list.Count; ++j) {
                        if (list [j] == null) {
                            ++overlapping;
                        } else if (qset.Contains(list [j] [i])) {
                            list [j] = null;
                            ++overlapping;
                        }
                    }
                    Kmin = Math.Max (Kmin, i + 1);
                }
            }
            return Kmin;
        }
Ejemplo n.º 2
0
Archivo: KnrFP.cs Proyecto: sadit/natix
        public static int[] GetFP(object a, Index refs, int k)
        {
            var knr = refs.SearchKNN (a, k);
            var aseq = new int[knr.Count];

            int i = 0;
            foreach (var p in knr) {
                aseq [i] = p.ObjID;
                ++i;
            }
            return aseq;
        }
Ejemplo n.º 3
0
 /// <summary>
 /// Search shell (not interactive at this level)
 /// </summary>
 public static void Search(Index index, IEnumerable<CommandQuery> qReader, ShellSearchOptions searchOps)
 {
     BinaryWriter ResultOutput = null;
     if (searchOps.ResultName != null) {
         ResultOutput = new BinaryWriter (File.Create (searchOps.ResultName + ".tmp"));
     }
     int qid = 0;
     long totaltime = 0;
     SearchCost totalCost = new SearchCost (0, 0);
     if (ResultOutput != null) {
         var reslist = new ResultList (searchOps.IndexName, searchOps.QueryName);
         // Dirty.SerializeBinary (Output, reslist);
         reslist.Save (ResultOutput);
     }
     foreach (CommandQuery qItem in qReader) {
         long tstart = DateTime.Now.Ticks;
         SearchCost startCost = index.Cost;
         IResult res;
         var qobj = qItem.QObj;
         if (qobj == null) {
             qobj = index.DB.Parse (qItem.QRaw, true);
         }
         if (qItem.QTypeIsRange) {
             res = index.SearchRange (qobj, qItem.QArg);
         } else {
             res = index.SearchKNN (qobj, (int)qItem.QArg);
         }
         var qraw = qItem.QRaw;
         if (qraw.Length > 1024) {
             qraw = "<very-large-qraw>";
         }
         SearchCost finalCost = index.Cost;
         finalCost.Internal -= startCost.Internal;
         finalCost.Total -= startCost.Total;
         totalCost.Internal += finalCost.Internal;
         totalCost.Total += finalCost.Total;
         long time = DateTime.Now.Ticks - tstart;
         totaltime += time;
         ResultInfo info = new ResultInfo (qid, qItem.EncodeQTypeQArgInSign(), qraw, finalCost, new TimeSpan (time), res);
         Console.WriteLine ("== qid: {0}", qid);
         Console.WriteLine ("== index: {0}, db: {1}, result: {2}", index, index.DB.Name, searchOps.ResultName);
         if (ResultOutput != null) {
             // Dirty.SerializeBinary (ResultOutput, info);
             info.Save (ResultOutput);
         }
         Console.WriteLine (info.ToString (searchOps.ShowMaxResult, null));
         qid++;
     }
     if (ResultOutput != null) {
         ResultOutput.Close ();
         if (File.Exists (searchOps.ResultName)) {
             File.Delete (searchOps.ResultName);
         }
         File.Move (searchOps.ResultName + ".tmp", searchOps.ResultName);
     }
     Console.WriteLine ("Number queries: {0}", qid);
     Console.WriteLine ("Average total-numdists: {0}", (totalCost.Total + 0.0) / qid);
     Console.WriteLine ("Average internal-distances: {0}", (totalCost.Internal + 0.0) / qid);
     Console.WriteLine ("Average external-distances: {0}", (totalCost.Total - totalCost.Internal + 0.0) / qid);
     Console.WriteLine ("Total search time: {0}", (new TimeSpan (totaltime)).TotalSeconds);
     Console.WriteLine ("Average search time: {0}", (new TimeSpan (totaltime / qid)).TotalSeconds);
 }
Ejemplo n.º 4
0
 /// <summary>
 /// Search shell (not interactive at this level)
 /// </summary>
 public static void Search(Index index, IEnumerable<CommandQuery> qReader, ShellSearchOptions searchOps)
 {
     var summary = new ResultSummary () {
         ResultName = searchOps.ResultName,
         IndexName = searchOps.IndexName,
         QueriesName = searchOps.QueryName
     };
     int qid = 0;
     long totaltime = 0;
     SearchCost totalCost = new SearchCost (0, 0);
     foreach (CommandQuery qItem in qReader) {
         long tstart = DateTime.Now.Ticks;
         SearchCost startCost = index.Cost;
         IResult res;
         var qobj = qItem.QObj;
         if (qobj == null) {
             qobj = index.DB.Parse (qItem.QRaw);
         }
         if (qItem.QTypeIsRange) {
             res = index.SearchRange (qobj, qItem.QArg);
         } else {
             res = index.SearchKNN (qobj, (int)qItem.QArg);
         }
         var qraw = qItem.QRaw;
         SearchCost finalCost = index.Cost;
         finalCost.Internal -= startCost.Internal;
         finalCost.Total -= startCost.Total;
         totalCost.Internal += finalCost.Internal;
         totalCost.Total += finalCost.Total;
         long time = DateTime.Now.Ticks - tstart;
         totaltime += time;
         var query = new Query () {
             QueryID = qid,
             QueryType = qItem.EncodeQTypeQArgInSign(),
             QueryRaw =  qraw,
             SearchCostTotal = finalCost.Total,
             SearchCostInternal = finalCost.Internal,
             SearchTime = (new TimeSpan(time)).TotalSeconds,
             Result = new List<ItemPair>(res)
         };
         Console.WriteLine ("-----  QueryID: {0}, QueryType: {1}  -----", query.QueryID, query.QueryType);
         if (res.Count == 0) {
             Console.WriteLine ("- results> empty result set");
         } else {
             Console.WriteLine ("- results> count: {0}, first-dist: {1}, last-dist: {2}", res.Count, res.First.Dist, res.CoveringRadius);
         }
         Console.WriteLine ("- search-time: {0}, cost-internal-distances: {1}, cost-total-distances: {2}", query.SearchTime, query.SearchCostInternal, query.SearchCostTotal);
         Console.WriteLine ("- index: {0}, db: {1}, result: {2}", index,
                            Path.GetFileName(index.DB.Name),
                            Path.GetFileName(searchOps.ResultName));
         summary.Add (query);
         qid++;
     }
     summary.ComputeSummary ();
     if (searchOps.ResultName != null) {
         File.WriteAllText (searchOps.ResultName, JsonConvert.SerializeObject (summary, Formatting.Indented));
     }
     Console.WriteLine ("Number queries: {0}", qid);
     Console.WriteLine ("Average total-numdists: {0}", (totalCost.Total + 0.0) / qid);
     Console.WriteLine ("Average internal-distances: {0}", (totalCost.Internal + 0.0) / qid);
     Console.WriteLine ("Average external-distances: {0}", (totalCost.Total - totalCost.Internal + 0.0) / qid);
     Console.WriteLine ("Total search time: {0}", (new TimeSpan (totaltime)).TotalSeconds);
     Console.WriteLine ("Average search time: {0}", (new TimeSpan (totaltime / qid)).TotalSeconds);
 }