private List <KDTree> getKBestTreeKD(DimWeight dw, int k, int maxSearch) { var queryp = new Point(dw.Pdf); var points = dimtree.root.GetANN(queryp, k, maxSearch); return(pointsToTrees(points, queryp)); }
public PointSet GetKNN(Point p, int K, DimWeight dw) { var pc = new PointCompare(dw, p); var heap = new MaxHeap <Point>(pc); for (int i = 0; i < pointset.Points.Count(); i++) { if (heap.Count < K) { heap.Add(pointset.Points[i]); } else if (pc.Compare(heap.GetMin(), pointset.Points[i]) > 0) { heap.ExtractDominating(); heap.Add(pointset.Points[i]); } } PointSet ps = new PointSet(p.NumDim); while (heap.Count > 0) { ps.AddPoint(heap.ExtractDominating()); } return(ps); }
public KDTree(PointSet ps, DimWeight dw, bool useRandom) { splitWeight = dw; root = new KDTreeNode(ps, null, splitWeight, useRandom); root.parent = new KDTreeNode(ps.NumDim, root); root = root.parent; }
public PointSet GetANN(Point p, DimWeight dw, int k, int ktree, double ratio, double prune, int maxSearch) { int nSearchTrees; if (!ToDisk) { nSearchTrees = (int)Math.Round(pointmap.Count() * ratio); } else { nSearchTrees = (int)Math.Round(filenames.Count() * ratio); } if (ratio > 1 || nSearchTrees > maxSearch) { Console.WriteLine("Invalid ratio/maxsearch"); return(null); } int nSearchForest = maxSearch - nSearchTrees; var trees = getKBestTreeKD(dw, ktree, nSearchTrees); var forest = new KDTreeForest(trees, prune); return(forest.GetANNWeighted(p, k, nSearchForest, dw)); }
public KDTreeNode(PointSet ps, KDTreeNode parent, DimWeight dwsplit, bool useRandom) { this.parent = parent; if (ps.Points.Count() > 1) { isLeaf = false; if (useRandom) { splitDim = dwsplit.getRandomDim(); } else { splitDim = ps.GetLongestDimension(dwsplit); } var pss = ps.PartitionMedian(splitDim); point = pss.median; leftChild = new KDTreeNode(pss.lower, this, dwsplit, useRandom); rightChild = new KDTreeNode(pss.upper, this, dwsplit, useRandom); } else if (ps.Points.Count() == 1) { isLeaf = true; point = ps.Points[0]; } else { isLeaf = true; } }
private List <KDTree> getKBestTreeLinear(DimWeight dw, int k) { var queryp = new Point(dw.Pdf); var points = bf.GetKNN(queryp, k); return(pointsToTrees(points, queryp)); }
public KDTreeForest(int ntrees, PointSet ps, DimWeight dw, bool useRandom) { NTrees = ntrees; trees = new List<KDTree>(); for (int i = 0; i < ntrees; i++) { trees.Add(new KDTree(ps, dw, useRandom)); } }
public KDTreeForestHolder(PointSet ps, int depthDeter, int nRandom, bool toDisk, bool useRandom) { NumDim = ps.NumDim; ToDisk = toDisk; dimWeights = new List <DimWeight>(); filenames = new Dictionary <Point, string>(); pointmap = new Dictionary <Point, KDTree>(); dimset = new PointSet(ps.NumDim); uniformdw = new DimWeight(ps.NumDim); dimWeights.Add(new DimWeight(NumDim)); for (int i = 1; i <= depthDeter; i++) { kSubset(new List <double>(), NumDim, i); } Random random = new Random(); for (int i = 0; i < nRandom; i++) { var weights = new List <double>(); for (int j = 0; j < NumDim; j++) { weights.Add(random.NextDouble()); } dimWeights.Add(new DimWeight(weights)); } int h = 0; foreach (var dw in dimWeights) { Console.WriteLine(h++); var point = new Point(dw.Pdf); if (ToDisk) { var filename = serializer.Serialize(new KDTree(ps, dw, useRandom)); filenames.Add(point, filename); } else { pointmap.Add(point, new KDTree(ps, dw, useRandom)); } dimset.AddPoint(point); } bf = new BruteForce(dimset); dimtree = new KDTree(dimset, useRandom); }
private void SearchDown(Point p, int K, int maxSearch, DimWeight dw, PointSet searched, MaxHeap <Point> heap, PointCompare pc) { if (this == null || this.point == null || searched.NPoints >= maxSearch) { return; } if (isLeaf) { CheckPoint(K, heap, pc); searched.AddPoint(point); } else { bool leftSearched; if (p.Values[splitDim] < point.Values[splitDim]) { leftChild.SearchDown(p, K, maxSearch, dw, searched, heap, pc); leftSearched = true; } else { rightChild.SearchDown(p, K, maxSearch, dw, searched, heap, pc); leftSearched = false; } // Check this point if (searched.NPoints < maxSearch) { CheckPoint(K, heap, pc); searched.AddPoint(point); } // Check if a better point possibly exists in the other subtree var pval = new List <Double>(p.Values); pval[splitDim] = point.Values[splitDim]; Point planecheck = new Point(pval); if (pc.Compare(heap.GetMin(), planecheck) >= 0) { if (leftSearched && rightChild != null) { rightChild.SearchDown(p, K, maxSearch, dw, searched, heap, pc); } else if (!leftSearched && leftChild != null) { leftChild.SearchDown(p, K, maxSearch, dw, searched, heap, pc); } } } }
public static void Full_Sys_Test_Alternative() { int dim = 32; int testcases = 20; int dws = 80; int K = 20; int maxSearch = 500; int ntrees = 3; int nrand = 100; int ddeter = 1; double ratio = .4; double prune = 1.5; Random random = new Random(); PointSet ps = new PointSet("ColorHistogram.asc", 1, ' '); ps.NormalizePoints(); //Indexes var bruteForce = new BruteForce(ps); var standardTree = new KDTree(ps, false); var standardForest = new KDTreeForestHolder(ps, ddeter, nrand, false, false); //Result holders var standardDist = new List <double>(); var queryDist = new List <double>(); var standardDist_f = new List <double>(); for (int i = 0; i < dws; i++) { Console.WriteLine(dim + " " + i); var querydw = new DimWeight(dim, random); var queryTree = new KDTree(ps, querydw, false); for (int j = 0; j < testcases; j++) { var p = ps.Points[random.Next(ps.Points.Count())]; var bfd = bruteForce.GetKNN(p, K, querydw).GetMeanDistance(p, querydw); standardDist.Add(standardTree.root.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); queryDist.Add(queryTree.root.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); standardDist_f.Add(standardForest.GetANN(p, querydw, K, ntrees, ratio, prune, maxSearch).GetMeanDistance(p, querydw) / bfd - 1); } } Console.WriteLine(standardDist.Average() + "," + standardDist_f.Average() + "," + queryDist.Average()); }
public PointSet GetANN(Point p, int K, int maxSearch, DimWeight dw) { this.p = p; this.K = K; this.maxSearch = maxSearch; this.dw = dw; searched = new PointSetHash(); pc = new PointCompare(dw, p); heap = new MaxHeap<Point>(pc); childHolds = new List<bool>(); mutexes = new List<Mutex>(); threadIds = new Dictionary<int, int>(); threads = new List<Thread>(); returned = new List<bool>(); nReturned = 0; for (int currNum = 0; currNum < NTrees; currNum++) { mutexes.Add(new Mutex()); childHolds.Add(false); returned.Add(false); mutexes[currNum].WaitOne(); Thread t = new Thread(new ThreadStart(searchStuff)); threadIds.Add(t.ManagedThreadId, currNum); threads.Add(t); t.Start(); } while (nReturned < NTrees) { for (int i = 0; i < NTrees; i++) { if (!returned[i]) { performAction(i); } } } cleanupThreads(); PointSet ps = new PointSet(p.NumDim); while (heap.Count > 0) { ps.AddPoint(heap.ExtractDominating()); } return ps; }
public PointSet GetANNWeighted(Point p, int K, int maxSearch, DimWeight dw) { this.p = p; this.K = K; this.maxSearch = maxSearch; this.dw = dw; searched = new PointSetHash(); pc = new PointCompare(dw, p); heap = new MaxHeap<Point>(pc); childHolds = new List<bool>(); mutexes = new List<Mutex>(); flaggedId = new HashSet<int>(); threadIds = new Dictionary<int, int>(); threads = new List<Thread>(); returned = new List<bool>(); nReturned = 0; for (int currNum = 0; currNum < NTrees; currNum++) { mutexes.Add(new Mutex()); childHolds.Add(false); returned.Add(false); mutexes[currNum].WaitOne(); Thread t = new Thread(new ThreadStart(searchStuffWeighted)); threadIds.Add(t.ManagedThreadId, currNum); threads.Add(t); t.Start(); } while (nReturned < NTrees) { int id = treeweights.GetRandomId(); bool res = performAction(treeweights.GetRandomId()); if (id >= returned.Count() || (!res && returned[id])) break; } cleanupThreads(); PointSet ps = new PointSet(p.NumDim); while (heap.Count > 0) { ps.AddPoint(heap.ExtractDominating()); } return ps; }
public PointSet GetANN(Point p, int K, int maxSearch, DimWeight dw) { var pc = new PointCompare(dw, p); var heap = new MaxHeap <Point>(pc); var searched = new PointSet(p.NumDim); SearchDown(p, K, maxSearch, dw, searched, heap, pc); PointSet ps = new PointSet(p.NumDim); while (heap.Count > 0) { ps.AddPoint(heap.ExtractDominating()); } return(ps); }
public static void Full_Sys_Test_nSearch() { int dim = 8; int npoint = 100000; int testcases = 20; int dws = 80; int K = 20; var maxSearchs = new List <int>() { 50, 200, 500, 2000, 8000 }; int ntrees = 3; int nrand = 100; int ddeter = 1; double ratio = .4; double prune = 1.5; var lines = new List <string>(); string filename = "resultsnsearch.csv"; Random random = new Random(); PointSet ps = new PointSet(dim); for (int i = 0; i < npoint; i++) { ps.AddPoint(new Point(dim, random)); } //Indexes var bruteForce = new BruteForce(ps); var standardTree = new KDTree(ps, false); var standardForest = new KDTreeForestHolder(ps, ddeter, nrand, false, false); //Result holders var standardDist = new List <List <double> >(); var queryDist = new List <List <double> >(); var standardDist_f = new List <List <double> >(); for (int i = 0; i < maxSearchs.Count(); i++) { standardDist.Add(new List <double>()); queryDist.Add(new List <double>()); standardDist_f.Add(new List <double>()); } for (int i = 0; i < dws; i++) { Console.WriteLine(dim + " " + i); var querydw = new DimWeight(dim, random); var queryTree = new KDTree(ps, querydw, false); for (int j = 0; j < testcases; j++) { var p = new Point(dim, random); var bfd = bruteForce.GetKNN(p, K, querydw).GetMeanDistance(p, querydw); foreach (var maxSearch in maxSearchs) { int indx = 0; standardDist[indx].Add(standardTree.root.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); queryDist[indx].Add(queryTree.root.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); standardDist_f[indx].Add(standardForest.GetANN(p, querydw, K, ntrees, ratio, prune, maxSearch).GetMeanDistance(p, querydw) / bfd - 1); indx++; } } } for (int i = 0; i < standardDist.Count(); i++) { lines.Add(standardDist[i].Average() + "," + queryDist[i].Average() + "," + standardDist_f[i].Average()); } System.IO.File.WriteAllLines(filename, lines); }
public void SearchDownThreaded(Point p, int K, int maxSearch, DimWeight dw, PointSetHash searched, MaxHeap <Point> heap, PointCompare pc, List <Boolean> b, List <Mutex> m, int id) { if (this == null || this.point == null || searched.NPoints >= maxSearch) { return; } if (isLeaf) { while (b[id]) { ; } m[id].WaitOne(); b[id] = true; if (!searched.CheckContains(point) && searched.NPoints < maxSearch) { CheckPoint(K, heap, pc); searched.AddPoint(point); } m[id].ReleaseMutex(); } else { bool leftSearched; if (p.Values[splitDim] < point.Values[splitDim]) { leftChild.SearchDownThreaded(p, K, maxSearch, dw, searched, heap, pc, b, m, id); leftSearched = true; } else { rightChild.SearchDownThreaded(p, K, maxSearch, dw, searched, heap, pc, b, m, id); leftSearched = false; } // Check this point while (b[id]) { ; } m[id].WaitOne(); b[id] = true; bool exceeded = maxSearch <= searched.NPoints; if (!searched.CheckContains(point) && searched.NPoints < maxSearch) { CheckPoint(K, heap, pc); searched.AddPoint(point); } // Check if a better point possibly exists in the other subtree var pval = new List <Double>(p.Values); pval[splitDim] = point.Values[splitDim]; Point planecheck = new Point(pval); bool expandOther = pc.Compare(heap.GetMin(), planecheck) >= 0; m[id].ReleaseMutex(); if (expandOther) { if (leftSearched && rightChild != null) { rightChild.SearchDownThreaded(p, K, maxSearch, dw, searched, heap, pc, b, m, id); } else if (!leftSearched && leftChild != null) { leftChild.SearchDownThreaded(p, K, maxSearch, dw, searched, heap, pc, b, m, id); } } } }
public static void Full_Sys_Test_Nrand() { int dim = 8; int npoint = 100000; int testcases = 20; int dws = 80; int K = 20; int maxSearch = 500; int ntrees = 3; var nrands = new List <int>() { 20, 50, 100, 200, 400 }; int ddeter = 1; double ratio = .4; double prune = 1.5; var lines = new List <string>(); string filename = "resultsnrand.csv"; Random random = new Random(); foreach (var nrand in nrands) { PointSet ps = new PointSet(dim); for (int i = 0; i < npoint; i++) { ps.AddPoint(new Point(dim, random)); } //Indexes var bruteForce = new BruteForce(ps); var standardTree = new KDTree(ps, false); var standardForest = new KDTreeForestHolder(ps, ddeter, nrand, false, false); //Result holders var standardDist = new List <double>(); var queryDist = new List <double>(); var standardDist_f = new List <double>(); for (int i = 0; i < dws; i++) { Console.WriteLine(dim + " " + i); var querydw = new DimWeight(dim, random); var queryTree = new KDTree(ps, querydw, false); for (int j = 0; j < testcases; j++) { var p = new Point(dim, random); var bfd = bruteForce.GetKNN(p, K, querydw).GetMeanDistance(p, querydw); standardDist.Add(standardTree.root.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); queryDist.Add(queryTree.root.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); double val = standardForest.GetANN(p, querydw, K, ntrees, ratio, prune, maxSearch).GetMeanDistance(p, querydw) / bfd - 1; if (!Double.IsNaN(val) && val < 1000) { standardDist_f.Add(val); } } } lines.Add(standardDist.Average() + "," + standardDist_f.Average() + "," + queryDist.Average()); } System.IO.File.WriteAllLines(filename, lines); }
public static void DRV_Matching_Test() { int dim = 8; int npoint = 100000; int testcases = 10; int dws = 10; int K = 20; int maxSearch = 500; int ntrees = 3; Random random = new Random(); PointSet ps = new PointSet(dim); for (int i = 0; i < npoint; i++) { ps.AddPoint(new Point(dim, random)); } //Indexes var bruteForce = new BruteForce(ps); var standardTree_r = new KDTree(ps, true); var standardTree = new KDTree(ps, false); var standardForest_r = new KDTreeForest(ntrees, ps, true); var standardForest = new KDTreeForest(ntrees, ps, false); //Result holders var bruteforceDist = new List <double>(); var standardDist = new List <double>(); var queryDist = new List <double>(); var standardDist_r = new List <double>(); var queryDist_r = new List <double>(); var standardDist_f = new List <double>(); var queryDist_f = new List <double>(); var standardDist_r_f = new List <double>(); var queryDist_r_f = new List <double>(); for (int i = 0; i < dws; i++) { Console.WriteLine(i); var querydw = new DimWeight(dim, random); var queryTree = new KDTree(ps, querydw, false); var queryTree_r = new KDTree(ps, querydw, true); var queryForest = new KDTreeForest(ntrees, ps, querydw, false); var queryForest_r = new KDTreeForest(ntrees, ps, querydw, true); for (int j = 0; j < testcases; j++) { var p = new Point(dim, random); //bruteforceDist.Add(bruteForce.GetKNN(p, K, querydw).GetMeanDistance(p, querydw)); var bfd = bruteForce.GetKNN(p, K, querydw).GetMeanDistance(p, querydw); standardDist.Add(standardTree.root.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); queryDist.Add(queryTree.root.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); standardDist_r.Add(standardTree_r.root.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); queryDist_r.Add(queryTree_r.root.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); //Forests standardDist_f.Add(standardForest.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); queryDist_f.Add(queryForest.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); standardDist_r_f.Add(standardForest_r.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); queryDist_r_f.Add(queryForest_r.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); } } //Console.WriteLine(bruteforceDist.Average()); Console.WriteLine(standardDist.Average() + " " + queryDist.Average()); Console.WriteLine(standardDist_r.Average() + " " + queryDist_r.Average()); Console.WriteLine(standardDist_f.Average() + " " + queryDist_f.Average()); Console.WriteLine(standardDist_r_f.Average() + " " + queryDist_r_f.Average()); //Console.WriteLine(standardDist.Max() + " " + queryDist.Max()); //Console.WriteLine(standardDist_r.Max() + " " + queryDist_r.Max()); //Console.WriteLine(standardDist_f.Max() + " " + queryDist_f.Max()); //Console.WriteLine(standardDist_r_f.Max() + " " + queryDist_r_f.Max()); }
public static void Full_Sys_Test_ExtremeDRV() { int dim = 8; int npoint = 100000; int testcases = 10; int dws = 50; int K = 20; int maxSearch = 500; int ntrees = 3; int nrand = 50; int ddeter = 3; double ratio = .4; double prune = 1.5; double selper = .125; Random random = new Random(); PointSet ps = new PointSet(dim); for (int i = 0; i < npoint; i++) { ps.AddPoint(new Point(dim, random)); } ////garbage //var p2 = new Point(dim, random); //var querydw2 = new DimWeight(dim, 0, random); //var queryTree2 = new KDTree(ps, querydw2, false); //var tmp = queryTree2.root.GetANN(p2, K, maxSearch, querydw2); //Indexes var bruteForce = new BruteForce(ps); var standardTree = new KDTree(ps, false); var standardForest = new KDTreeForestHolder(ps, ddeter, nrand, false, false); var standardTree_r = new KDTree(ps, true); var standardForest_r = new KDTreeForestHolder(ps, ddeter, nrand, false, true); //Result holders var standardDist = new List <double>(); var queryDist = new List <double>(); var standardDist_f = new List <double>(); var standardDist_r = new List <double>(); var queryDist_r = new List <double>(); var standardDist_f_r = new List <double>(); for (int i = 0; i < dws; i++) { Console.WriteLine(i); var querydw = new DimWeight(dim, selper, random); var queryTree = new KDTree(ps, querydw, false); var queryTree_r = new KDTree(ps, querydw, true); for (int j = 0; j < testcases; j++) { var p = new Point(dim, random); var bfd = bruteForce.GetKNN(p, K, querydw).GetMeanDistance(p, querydw); standardDist.Add(standardTree.root.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); queryDist.Add(queryTree.root.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); standardDist_f.Add(standardForest.GetANN(p, querydw, K, ntrees, ratio, prune, maxSearch).GetMeanDistance(p, querydw) / bfd - 1); standardDist_r.Add(standardTree_r.root.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); queryDist_r.Add(queryTree_r.root.GetANN(p, K, maxSearch, querydw).GetMeanDistance(p, querydw) / bfd - 1); standardDist_f_r.Add(standardForest_r.GetANN(p, querydw, K, ntrees, ratio, prune, maxSearch).GetMeanDistance(p, querydw) / bfd - 1); } } Console.WriteLine(standardDist.Average() + " " + standardDist_f.Average() + " " + queryDist.Average()); Console.WriteLine(standardDist_r.Average() + " " + standardDist_f_r.Average() + " " + queryDist_r.Average()); Console.WriteLine(stdev(standardDist) + " " + stdev(standardDist_f) + " " + stdev(queryDist)); Console.WriteLine(stdev(standardDist_r) + " " + stdev(standardDist_f_r) + " " + stdev(queryDist_r)); }
public static void Test_Tree_maxSearch() { int dim = 8; int npoint = 100000; int testcases = 10; int dws = 10; int K = 20; int maxSearch = 200; int ntrees = 3; Random random = new Random(); PointSet ps = new PointSet(dim); for (int i = 0; i < npoint; i++) { ps.AddPoint(new Point(dim, random)); } //Indexes var bruteForce = new BruteForce(ps); var standardTree = new KDTree(ps, false); //Result holders var bruteforceDist = new List <double>(); var standardDist = new List <List <double> >(); var queryDist = new List <List <double> >(); //Final Result Holder var finStd = new List <double>(); var finQue = new List <double>(); for (int i = 0; i < dws; i++) { Console.WriteLine(i); var querydw = new DimWeight(dim, random); var queryTree = new KDTree(ps, querydw, false); standardDist.Clear(); queryDist.Clear(); for (int j = 0; j < testcases; j++) { standardDist.Add(new List <double>()); queryDist.Add(new List <double>()); var p = new Point(dim, random); //bruteforceDist.Add(bruteForce.GetKNN(p, K, querydw).GetMeanDistance(p, querydw)); var bfd = bruteForce.GetKNN(p, K, querydw).GetMeanDistance(p, querydw); for (int k = maxSearch; k < 8000; k = k + 200) { if (j == 0 && i == 0) { finStd.Add(0); finQue.Add(0); } standardDist[j].Add(standardTree.root.GetANN(p, K, k, querydw).GetMeanDistance(p, querydw) / bfd - 1); queryDist[j].Add(queryTree.root.GetANN(p, K, k, querydw).GetMeanDistance(p, querydw) / bfd - 1); } } for (int k = 0; k < queryDist[0].Count(); k++) { double sumStd = 0; double sumQue = 0; for (int j = 0; j < testcases; j++) { sumStd += standardDist[j][k]; sumQue += queryDist[j][k]; } sumStd /= testcases; sumQue /= testcases; finStd[k] += sumStd / dws; finQue[k] += sumQue / dws; } } FileStream fs = new FileStream("Results2.csv", FileMode.Create); StreamWriter sw = new StreamWriter(fs); for (int i = 0; i < finStd.Count(); i++) { sw.WriteLine(finStd[i] + "," + finQue[i]); } }