/// <summary> /// Build the index /// </summary> public virtual void Build(MetricDB db, int bsize, Random rand) { this.DB = db; var n = this.DB.Count; // randomized has very good performance, even compared with more "intelligent" strategies var dseq = new DynamicSequentialOrdered (); dseq.Build (db, rand); this.NODES = new List<Node> (n / bsize + 1); var L = new List<ItemPair> (n); while (dseq.Count > 0) { if (this.NODES.Count % 100 == 0) { Console.WriteLine ("XXX {0}, bucketSize: {1}, remain {2}/{3}, db: {4}, date-time: {5}", this, bsize, dseq.Count, db.Count, Path.GetFileName(db.Name), DateTime.Now); } var refID = dseq.GetAnyItem (); dseq.Remove (refID); L.Clear (); dseq.ComputeDistances (this.DB[refID], L); var near = new Result(bsize); var far = new Result (1); dseq.AppendKExtremes (near, far, L); var node = new Node (refID); this.NODES.Add (node); dseq.Remove (near); foreach (var p in near) { node.Add(p.ObjID, p.Dist); } } }
public virtual void Build(MetricDB DB, double alpha, int min_bs, int seed, bool do_far) { var idxDynamic = new DynamicSequentialOrdered (); idxDynamic.Build (DB, RandomSets.GetRandomPermutation(DB.Count, new Random(seed))); // this.Items = new ItemPair[DB.Count]; var pivs = new List<Pivot> (32); var items = new List<ItemPair> (DB.Count); int I = 0; var extreme_items = new List<ItemPair>(idxDynamic.Count); while (idxDynamic.Count > 0) { var pidx = idxDynamic.GetAnyItem(); object piv = DB[pidx]; idxDynamic.Remove(pidx); // this.Items[pidx] = new ItemPair(pidx, 0); DynamicSequential.Stats stats; Pivot piv_data; double near_first = double.MaxValue; double near_last = 0; double far_first = double.MaxValue; int num_near = 0; int num_far = 0; { IResult near, far; this.SearchExtremes(idxDynamic, extreme_items, piv, alpha, min_bs, out near, out far, out stats); foreach (var pair in near) { near_first = Math.Min (near_first, pair.Dist); near_last = Math.Max (near_last, pair.Dist); items.Add( new ItemPair { ObjID = pair.ObjID, Dist = pair.Dist} ); } num_near = near.Count; idxDynamic.Remove(near); if (do_far) { foreach (var pair in far) { far_first = Math.Min (far_first, pair.Dist); items.Add( new ItemPair {ObjID = pair.ObjID, Dist = pair.Dist} ); } num_far = far.Count; idxDynamic.Remove(far); } piv_data = new Pivot(pidx, stats.mean, stats.stddev, near_last, far_first, num_near, num_far); pivs.Add(piv_data); } if (I % 10 == 0) { Console.WriteLine (""); Console.WriteLine (this.ToString()); Console.WriteLine("-- I {0}> remains: {1}, alpha: {2}, mean: {3}, stddev: {4}, pivot: {5}, min_bs: {6}, db: {7}, do_far: {8}", I, idxDynamic.Count, alpha, stats.mean, stats.stddev, pidx, min_bs, DB.Name, do_far); if (piv_data.num_near > 0) { Console.WriteLine("-- (NORMVAL) first-near: {0}, last-near: {1}, near-count: {2}", near_first / stats.max, piv_data.last_near / stats.max, piv_data.num_near); } if (piv_data.num_far > 0) { Console.WriteLine("++ (NORMVAL) first-far: {0}, far-count: {1}", piv_data.first_far / stats.max, piv_data.num_far); } } ++I; //Console.WriteLine("Number of objects after: {0}",idxDynamic.DOCS.Count); } Console.WriteLine("Number of pivots per group: {0}", I); this.Pivs = pivs.ToArray (); this.Items = items.ToArray (); }