static void CreateBKTree(string outputSerializedPath) { List <string> allLines = File.ReadAllLines("c:/users/brush/desktop/allStreets.csv").ToList(); Data data = DataLoader.LoadData(regenerateBKTree: true); //List<string> allCorrectedStrings = new List<string>(); //string[] allLines = File.ReadAllLines("C:/users/brush/desktop/allStreets.csv"); //Dictionary<string, List<string>> streetNamesWithMultipleSuffixes = new Dictionary<string, List<string>>(); List <string> justNames = new List <string>(); foreach (string line in allLines) { string[] parts = line.Trim().Split(' '); string suffix = ""; int index = -1; if ((index = data.Suffixes.IndexOf(parts[parts.Length - 1])) != -1) { justNames.Add(string.Join(" ", parts.Take(parts.Length - 1))); } else { justNames.Add(line); } } BKTree tree = BKTreeEngine.CreateBKTree(justNames); BKTreeSerializer.SerializeTo(tree, outputSerializedPath); }
public void IndexFiles(FileInfo[] imageFiles, BackgroundWorker IndexBgWorker, object argument = null) { BKTree <CEDDTreeNode> ceddtree = new BKTree <CEDDTreeNode>(); double[] ceddDiscriptor = null; int totalFileCount = imageFiles.Length; CEDD cedd = new CEDD(); for (int i = 0; i < totalFileCount; i++) { var fi = imageFiles[i]; using (Bitmap bmp = new Bitmap(Image.FromFile(fi.FullName))) { ceddDiscriptor = cedd.Apply(bmp); } CEDDTreeNode ceddTreeNode = new CEDDTreeNode { Id = i, ImageName = fi.Name, ImagePath = fi.FullName, CEDDDiscriptor = ceddDiscriptor }; ceddtree.add(ceddTreeNode); IndexBgWorker.ReportProgress(i); } CEDDRepository <BKTree <CEDDTreeNode> > repo = new CEDDRepository <BKTree <CEDDTreeNode> >(); repo.Save(ceddtree); CacheHelper.Remove("CeddIndexTree"); }
public void IndexFiles(FileInfo[] imageFiles, BackgroundWorker IndexBgWorker, object argument = null) { BKTree<CEDDTreeNode> ceddtree = new BKTree<CEDDTreeNode>(); double[] ceddDiscriptor = null; int totalFileCount = imageFiles.Length; CEDD cedd = new CEDD(); for (int i = 0; i < totalFileCount; i++) { var fi = imageFiles[i]; using (Bitmap bmp = new Bitmap(Image.FromFile(fi.FullName))) { ceddDiscriptor = cedd.Apply(bmp); } CEDDTreeNode ceddTreeNode = new CEDDTreeNode { Id = i, ImageName = fi.Name, ImagePath = fi.FullName, CEDDDiscriptor = ceddDiscriptor }; ceddtree.add(ceddTreeNode); IndexBgWorker.ReportProgress(i); } CEDDRepository<BKTree<CEDDTreeNode>> repo = new CEDDRepository<BKTree<CEDDTreeNode>>(); repo.Save(ceddtree); CacheHelper.Remove("CeddIndexTree"); }
public void BKTree_SearchHamming() { var tree = new BKTree(BKTree.DistanceMetric.Hamming); tree.Add("taxi"); tree.Add("Test2"); tree.Add("Text"); tree.Add("Test"); tree.Add("tttt"); // Hamming distance Test = Test2 (compare the length of the shorter word) var results = tree.Search("Test", 0); var result_d0 = results.Count; Assert.AreEqual(2, result_d0); // te-t max. 1 diff : teSt teSt2 teXt var result_d1 = tree.Search("te-t", 1).Count; Assert.AreEqual(3, result_d1); // test max. 2 diff : test test2 teXt tTTt var result_d2 = tree.Search("test", 2).Count; Assert.AreEqual(4, result_d2); }
public void BKTree_SearchLevenshtein() { var tree = new BKTree(); tree.Add("Test"); tree.Add("Test"); tree.Add("Test2"); tree.Add("taxi"); tree.Add("Text"); tree.Add("tttt"); // test -> test var result_d0 = tree.Search("Test", 0).Count; Assert.AreEqual(1, result_d0); // te-t -> teSt teXt var result_d1 = tree.Search("te-t", 1).Count; Assert.AreEqual(2, result_d1); // test -> test test2 teXt tTTt var result_d2 = tree.Search("test", 2).Count; Assert.AreEqual(4, result_d2); }
public void Load(BKTree bk) { try { DataTable dt = GetDataTable(); int current, progress = -1; int rowCount = dt.Rows.Count; for (int i = rowCount - 1; i > -1; i--) { bk.Add(dt.Rows[i]["Ad"].ToString()); dt.Rows.RemoveAt(i); current = 100 - i * 100 / rowCount; if (current != progress) { progress = current; afterProgress(progress); } } } catch (Exception ex) { throw new Exception("Ağaç oluşturulamadı. " + ex.Message); } }
public void SimpleTests() { string[] testValues = { "cook", "book", "books", "cake", "what", "water", "Cape", "Boon", "Cook", "Cart" }; var tree = BKTree.Create(testValues); var results1 = tree.Find("wat", threshold: 1); Assert.Single(results1, "what"); var results2 = tree.Find("wat", threshold: 2); Assert.True(results2.SetEquals(Expected("cart", "what", "water"))); var results3 = tree.Find("caqe", threshold: 1); Assert.True(results3.SetEquals(Expected("cake", "cape"))); }
static bool CompareTrees(BKTree tree1, BKTree tree2) { if (tree1 == null || tree2 == null) { return(tree1 == tree2); } else if (tree1.Index == tree2.Index && tree1.StringValue == tree2.StringValue && tree1.Children.Length == tree2.Children.Length) { for (int c = 0; c < tree1.Children.Length; c++) { if (!CompareTrees(tree1.Children[c], tree2.Children[c])) { return(false); } } return(true); } else { return(false); } }
private static void TestTreeInvariants(string[] testValues) { var tree = BKTree.Create(testValues); foreach (var value in testValues) { // With a threshold of 0, we should only find exactly the item we're searching for. Assert.Single(tree.Find(value, threshold: 0), value.ToLower()); } foreach (var value in testValues) { // With a threshold of 1, we should always at least find the item we're looking for. // But we may also find additional items along with it. var items = tree.Find(value, threshold: 1); Assert.Contains(value.ToLower(), items); // We better not be finding all items. Assert.NotEqual(testValues.Length, items.Count); } foreach (var value in testValues) { // If we delete each individual character in each search string, we should still // find the value in the tree. for (var i = 0; i < value.Length; i++) { var items = tree.Find(Delete(value, i), threshold: null); Assert.Contains(value.ToLower(), items); // We better not be finding all items. Assert.NotEqual(testValues.Length, items.Count); } } foreach (var value in testValues) { // If we add a random character at any location in a string, we should still // be able to find it. for (var i = 0; i <= value.Length; i++) { var items = tree.Find(Insert(value, i, 'Z'), threshold: null); Assert.Contains(value.ToLower(), items); // We better not be finding all items. Assert.NotEqual(testValues.Length, items.Count); } } foreach (var value in testValues) { // If we transpose any characters in a string, we should still // be able to find it. for (var i = 0; i < value.Length - 1; i++) { var items = tree.Find(Transpose(value, i), threshold: null); Assert.Contains(value.ToLower(), items); } } }
/// <summary> /// Initialize index data & bktrees /// </summary> private void Initialize() { index = new Index(); averagehash_bktree = new BKTree(Images.ImageHashAlgorithm.Average, index); differencehash_bktree = new BKTree(Images.ImageHashAlgorithm.Difference, index); blockhash_bktree = new BKTree(Images.ImageHashAlgorithm.Block, index); perceptivehash_bktree = new BKTree(Images.ImageHashAlgorithm.Perceptive, index); histogramhash_bktree = new BKTree(Images.ImageHashAlgorithm.Histogram, index); colorhash_bktree = new BKTree(Images.ImageHashAlgorithm.Color, index); image_folder = String.Empty; }
public void BKTreeShouldThrowUponAddingNullNode() { BKTree <ExampleMetric> tree = new BKTree <ExampleMetric>(); tree.Add(new ExampleMetric(1, new int[] { 100, 200, 300 })); tree.Add(new ExampleMetric(2, new int[] { 110, 210, 310 })); tree.Add(new ExampleMetric(3, new int[] { 130, 230, 330 })); tree.Add(new ExampleMetric(4, new int[] { 140, 240, 340 })); tree.Add(null); }
public void BKTree_should_QueryBestMatchesBelowGivenThreshold() { BKTree <TestNode> tree = new BKTree <TestNode>(); TestNode search = new TestNode(new int[] { 399, 400, 400 }); TestNode best1 = new TestNode(41, new int[] { 400, 400, 400 }); TestNode best2 = new TestNode(42, new int[] { 403, 403, 403 }); TestNode best3 = new TestNode(43, new int[] { 406, 406, 406 }); tree.add(new TestNode(1, new int[] { 100, 100, 100 })); tree.add(new TestNode(2, new int[] { 200, 200, 200 })); tree.add(new TestNode(3, new int[] { 300, 300, 300 })); tree.add(best1); tree.add(best2); tree.add(new TestNode(5, new int[] { 500, 500, 500 })); Dictionary <TestNode, Int32> results; // Query for match within distance of 1 (best1 is only expected result) results = tree.query(search, 1); Assert.Equal(1, results.Count); Assert.Equal(1, DistanceMetric.calculateLeeDistance(search.Data, best1.Data)); Assert.Equal(1, results.Values.ElementAt(0)); Assert.Equal(41, results.Keys.ElementAt(0).Id); Assert.Equal(best1.Data, results.Keys.ElementAt(0).Data); // Query for match within distance of 10 (best1 & best2 are expected results) tree.add(best3); // exercise adding another node after already queried results = tree.query(search, 10); Assert.Equal(2, results.Count); Assert.Equal(1, DistanceMetric.calculateLeeDistance(search.Data, best1.Data)); Assert.Equal(10, DistanceMetric.calculateLeeDistance(search.Data, best2.Data)); Assert.True(results.Contains(new KeyValuePair <TestNode, int>(best1, 1))); Assert.True(results.Contains(new KeyValuePair <TestNode, int>(best2, 10))); // Query for matches within distance of 20 (best1, best2 & best3 are expected results) results = tree.query(search, 20); Assert.Equal(3, results.Count); Assert.Equal(1, DistanceMetric.calculateLeeDistance(search.Data, best1.Data)); Assert.Equal(10, DistanceMetric.calculateLeeDistance(search.Data, best2.Data)); Assert.Equal(19, DistanceMetric.calculateLeeDistance(search.Data, best3.Data)); Assert.True(results.Contains(new KeyValuePair <TestNode, int>(best1, 1))); Assert.True(results.Contains(new KeyValuePair <TestNode, int>(best2, 10))); Assert.True(results.Contains(new KeyValuePair <TestNode, int>(best3, 19))); }
private static void ExecuteSearch(string[] args) { string photoFilePath = GetPhotoPath(args); string databaseMetaTablePath = GetDatabaseMetaTable(args); if (string.IsNullOrWhiteSpace(photoFilePath) || string.IsNullOrWhiteSpace(databaseMetaTablePath)) { PrintHelp("Photo path or database metatable path not provided"); return; } if (File.Exists(photoFilePath) == false) { PrintHelp("Photo file does not exist"); return; } if (File.Exists(databaseMetaTablePath) == false) { PrintHelp("Database MetaTable does not exist"); return; } using (Image frame = Image.FromFile(photoFilePath)) { ulong providedPhotoHash = FrameIndexer.CalculateFramePerceptionHashOnly(frame); VideoFingerPrintDatabaseMetaTableWrapper metaTable = VideoFingerPrintDatabaseMetaTableLoader.Load(databaseMetaTablePath); BKTree <FrameMetricWrapper> bktree = ModelMetricUtils.CreateBKTree(metaTable); IDictionary <FrameMetricWrapper, int> treeResults = bktree.Query( new PhotoMetricWrapper { Photo = new PhotoFingerPrintWrapper { FilePath = photoFilePath, PHash = providedPhotoHash, }, }, 2 ); foreach (KeyValuePair <FrameMetricWrapper, int> kvp in treeResults.OrderBy(e => e.Value)) { FrameMetricWrapper frameWrapper = kvp.Key; int distance = kvp.Value; Console.WriteLine(string.Format("Distance {0} for {1} at Frame {2}", distance, frameWrapper.Video.FilePath, frameWrapper.Frame.FrameNumber)); } } }
public void with_max_distance_0_should_return_exact_matches_only() { var tree = new BKTree<string>(new DamerauLevenshteinStringDistanceMeasurer()); tree.Add(new BKTreeNode<string>("book")); //root tree.Add(new BKTreeNode<string>("rook")); //1 tree.Add(new BKTreeNode<string>("nooks")); //2 tree.Add(new BKTreeNode<string>("boon")); //1->2 const string query = "boon"; const int maxDistance = 0; var matches = tree.Matches(query, maxDistance); Assert.That(matches.Count, Is.EqualTo(1)); Assert.That(matches.Single(), Is.EqualTo(query)); }
public void BKTree_should_FindBestDistance() { BKTree <TestNode> tree = new BKTree <TestNode>(); TestNode search = new TestNode(new int[] { 118, 223, 316 }); TestNode best = new TestNode(3, new int[] { 120, 220, 320 }); tree.add(new TestNode(1, new int[] { 100, 200, 300 })); tree.add(new TestNode(2, new int[] { 110, 210, 310 })); tree.add(best); tree.add(new TestNode(4, new int[] { 130, 230, 330 })); tree.add(new TestNode(5, new int[] { 140, 240, 340 })); Assert.Equal(9, DistanceMetric.calculateLeeDistance(search.Data, best.Data)); Assert.Equal(9, tree.findBestDistance(search)); }
public void BKTree_should_FindBestDistance() { BKTree<TestNode> tree = new BKTree<TestNode>(); TestNode search = new TestNode(new int[] { 118, 223, 316 }); TestNode best = new TestNode(3, new int[] { 120, 220, 320 }); tree.add(new TestNode(1, new int[] { 100, 200, 300 })); tree.add(new TestNode(2, new int[] { 110, 210, 310 })); tree.add(best); tree.add(new TestNode(4, new int[] { 130, 230, 330 })); tree.add(new TestNode(5, new int[] { 140, 240, 340 })); Assert.Equal(9, DistanceMetric.calculateLeeDistance(search.Data, best.Data)); Assert.Equal(9, tree.findBestDistance(search)); }
public void BKTree_should_ThrowUponAddingNullNode() { BKTree <TestNode> tree = new BKTree <TestNode>(); tree.add(new TestNode(1, new int[] { 100, 200, 300 })); tree.add(new TestNode(2, new int[] { 110, 210, 310 })); tree.add(new TestNode(3, new int[] { 130, 230, 330 })); tree.add(new TestNode(4, new int[] { 140, 240, 340 })); Assert.ThrowsDelegate boom = delegate { tree.add(null); }; Assert.Throws <NullReferenceException>(boom); }
public void BKTreeShouldQueryBestMatchesBelowGivenThreshold() { BKTree <ExampleMetric> tree = new BKTree <ExampleMetric>(); ExampleMetric search = new ExampleMetric(new int[] { 399, 400, 400 }); ExampleMetric best1 = new ExampleMetric(41, new int[] { 400, 400, 400 }); ExampleMetric best2 = new ExampleMetric(42, new int[] { 403, 403, 403 }); ExampleMetric best3 = new ExampleMetric(43, new int[] { 406, 406, 406 }); tree.Add(new ExampleMetric(1, new int[] { 100, 100, 100 })); tree.Add(new ExampleMetric(2, new int[] { 200, 200, 200 })); tree.Add(new ExampleMetric(3, new int[] { 300, 300, 300 })); tree.Add(best1); tree.Add(best2); tree.Add(new ExampleMetric(5, new int[] { 500, 500, 500 })); // Query for match within distance of 1 (best1 is only expected result) IDictionary <ExampleMetric, int> results = tree.Query(search, 1); Assert.AreEqual(1, DistanceMetric.CalculateLeeDistance(search.Data, best1.Data)); Assert.AreEqual(1, results.Values.ElementAt(0)); Assert.AreEqual(41, results.Keys.ElementAt(0).Id); Assert.AreEqual(best1.Data, results.Keys.ElementAt(0).Data); // Query for match within distance of 10 (best1 & best2 are expected results) tree.Add(best3); // exercise adding another node after already queried results = tree.Query(search, 10); Assert.AreEqual(2, results.Count); Assert.AreEqual(1, DistanceMetric.CalculateLeeDistance(search.Data, best1.Data)); Assert.AreEqual(10, DistanceMetric.CalculateLeeDistance(search.Data, best2.Data)); Assert.IsTrue(results.Contains(new KeyValuePair <ExampleMetric, int>(best1, 1))); Assert.IsTrue(results.Contains(new KeyValuePair <ExampleMetric, int>(best2, 10))); // Query for matches within distance of 20 (best1, best2 & best3 are expected results) results = tree.Query(search, 20); Assert.AreEqual(3, results.Count); Assert.AreEqual(1, DistanceMetric.CalculateLeeDistance(search.Data, best1.Data)); Assert.AreEqual(10, DistanceMetric.CalculateLeeDistance(search.Data, best2.Data)); Assert.AreEqual(19, DistanceMetric.CalculateLeeDistance(search.Data, best3.Data)); Assert.IsTrue(results.Contains(new KeyValuePair <ExampleMetric, int>(best1, 1))); Assert.IsTrue(results.Contains(new KeyValuePair <ExampleMetric, int>(best2, 10))); Assert.IsTrue(results.Contains(new KeyValuePair <ExampleMetric, int>(best3, 19))); }
/* * To use BKTree: * 1. Create a class dervied from BKTreeNode * 2. Add a member variable of your data to be sorted / retrieved * 3. Override the calculateDistance method to calculate the distance metric * between two nodes for the data to be sorted / retrieved. * 4. Instantiate a BKTree with the type name of the class created in (1). */ static void Main(string[] args) { /* * NOTE: More comprehensive examples of BK-Tree methods in unit tests */ // Exercise static distance metric methods -- just because Console.WriteLine( DistanceMetric.calculateHammingDistance( new byte[] { 0xEF, 0x35, 0x20 }, new byte[] { 0xAD, 0x13, 0x87 })); Console.WriteLine( DistanceMetric.calculateLeeDistance( new int[] { 196, 105, 48 }, new int[] { 201, 12, 51 })); Console.WriteLine( DistanceMetric.calculateLevenshteinDistance( "kitten", "sitting")); // Create BKTree with derived node class from top of file BKTree <ExampleNodeRecord> tree = new BKTree <ExampleNodeRecord>(); // Add some nodes tree.add(new ExampleNodeRecord(1, new int[] { 100, 200, 300 })); tree.add(new ExampleNodeRecord(2, new int[] { 110, 210, 310 })); tree.add(new ExampleNodeRecord(3, new int[] { 120, 220, 320 })); tree.add(new ExampleNodeRecord(4, new int[] { 130, 230, 330 })); tree.add(new ExampleNodeRecord(5, new int[] { 140, 240, 340 })); // Get best node from our tree with best distance Dictionary <ExampleNodeRecord, Int32> results = tree.findBestNodeWithDistance( new ExampleNodeRecord(new int[] { 103, 215, 303 })); // Get best nodes below threshold results = tree.query( new ExampleNodeRecord(new int[] { 103, 215, 303 }), 10); // arbitrary threshold // Dictionaries don't print well; so invent your own handy print routine }
/* * To use BKTree: * 1. Create a class dervied from BKTreeNode * 2. Add a member variable of your data to be sorted / retrieved * 3. Override the calculateDistance method to calculate the distance metric * between two nodes for the data to be sorted / retrieved. * 4. Instantiate a BKTree with the type name of the class created in (1). */ static void Main(string[] args) { /* * NOTE: More comprehensive examples of BK-Tree methods in unit tests */ // Exercise static distance metric methods -- just because Console.WriteLine( DistanceMetric.calculateHammingDistance( new byte[] { 0xEF, 0x35, 0x20 }, new byte[] { 0xAD, 0x13, 0x87 })); Console.WriteLine( DistanceMetric.calculateLeeDistance( new int[] { 196, 105, 48 }, new int[] { 201, 12, 51 })); Console.WriteLine( DistanceMetric.calculateLevenshteinDistance( "kitten", "sitting")); // Create BKTree with derived node class from top of file BKTree<ExampleNodeRecord> tree = new BKTree<ExampleNodeRecord>(); // Add some nodes tree.add( new ExampleNodeRecord( 1, new int[] {100,200,300}) ); tree.add( new ExampleNodeRecord( 2, new int[] {110,210,310}) ); tree.add( new ExampleNodeRecord( 3, new int[] {120,220,320}) ); tree.add( new ExampleNodeRecord( 4, new int[] {130,230,330}) ); tree.add( new ExampleNodeRecord( 5, new int[] {140,240,340}) ); // Get best node from our tree with best distance Dictionary<ExampleNodeRecord, Int32> results = tree.findBestNodeWithDistance( new ExampleNodeRecord( new int[] { 103, 215, 303 }) ); // Get best nodes below threshold results = tree.query( new ExampleNodeRecord(new int[] { 103, 215, 303 }), 10 ); // arbitrary threshold // Dictionaries don't print well; so invent your own handy print routine }
private static IDictionary <string, ISet <PhotoFingerPrintWrapper> > MapPhotosToVideos( PhotoFingerPrintDatabaseWrapper photoDatabase, VideoFingerPrintDatabaseMetaTableWrapper metatable ) { IDictionary <string, ISet <PhotoFingerPrintWrapper> > resultMap = new Dictionary <string, ISet <PhotoFingerPrintWrapper> >(); IDictionary <string, VideoFingerPrintWrapper> fileNameToVideoFingerPrintMap = MetaTableUtils.EnumerateVideoFingerPrints(metatable).ToDictionary(e => e.FilePath); BKTree <FrameMetricWrapper> bktree = ModelMetricUtils.CreateBKTree(metatable); foreach (PhotoFingerPrintWrapper photo in photoDatabase.PhotoFingerPrints) { // 1. Find bucket of possible candidates IDictionary <FrameMetricWrapper, int> treeResults = bktree.Query( new PhotoMetricWrapper { Photo = photo, }, DefaultMetricThreshold ); IDictionary <string, ISet <FrameMetricWrapper> > collapsedTreeResults = ModelMetricUtils.CollapseTreeResults(treeResults); // 2. Find most likely result and add it to the bucket if (treeResults.Count > 0) { VideoFingerPrintWrapper mostLikelyVideo = FindMostLikelyVideo(photo, collapsedTreeResults, fileNameToVideoFingerPrintMap); // In the case where we didn't get any results, we just skip this photo and move alone if (mostLikelyVideo == null) { continue; } ISet <PhotoFingerPrintWrapper> bucket; string videoFileName = mostLikelyVideo.FilePath; if (resultMap.TryGetValue(videoFileName, out bucket) == false) { bucket = new HashSet <PhotoFingerPrintWrapper>(); resultMap.Add(videoFileName, bucket); } } } return(resultMap); }
public void BKTreeImage_Constructor() { var index = new Images.ImageIndex.Index(); var image_hashs = new ConcurrentDictionary <String, List <String> >(); var hash_algo = Images.ImageHashAlgorithm.Average; ulong ahash1 = 181; ulong ahash2 = 171; var hash1 = new List <String> { ahash1.ToString(), "1111111111" }; index.Add("Test", hash1); var hash2 = new List <String> { ahash2.ToString(), "1111111110" }; index.Add("Test2", hash2); var tree = new BKTree(hash_algo, index); var id = index.Id("Test"); tree.Add(id); id = index.Id("Test2"); tree.Add(id); // using Average tree algo Assert.AreEqual("Average", tree.ImageHashAlgo()); Assert.AreEqual(ahash1.ToString(), tree.GetImageHash("Test")); Assert.AreEqual(ahash2.ToString(), tree.GetImageHash("Test2")); Assert.AreEqual(100.0, tree.GetImageSimilarity("Test", "Test")); Assert.AreNotEqual(100.0, tree.GetImageSimilarity("Test", "Test2")); // using PerceptiveColor tree algo hash_algo = Images.ImageHashAlgorithm.PerceptiveColor; image_hashs = new ConcurrentDictionary <String, List <String> >(); index = new Images.ImageIndex.Index(); tree = new BKTree(hash_algo, index); Assert.AreEqual("PerceptiveColor", tree.ImageHashAlgo()); }
public void Test2() { string[] testValues = { "Leeds", "York", "Bristol", "Leicester", "Hull", "Durham" }; var tree = BKTree.Create(testValues); var results = tree.Find("hill", threshold: null); Assert.True(results.SetEquals(Expected("hull"))); results = tree.Find("liecester", threshold: null); Assert.True(results.SetEquals(Expected("leicester"))); results = tree.Find("leicestre", threshold: null); Assert.True(results.SetEquals(Expected("leicester"))); results = tree.Find("lecester", threshold: null); Assert.True(results.SetEquals(Expected("leicester"))); }
public void BKTree_should_FindBestNode() { BKTree <TestNode> tree = new BKTree <TestNode>(); TestNode search = new TestNode(new int[] { 210, 175, 233 }); TestNode best = new TestNode(2, new int[] { 200, 200, 200 }); tree.add(new TestNode(1, new int[] { 100, 100, 100 })); tree.add(best); tree.add(new TestNode(3, new int[] { 300, 300, 300 })); tree.add(new TestNode(4, new int[] { 400, 400, 400 })); tree.add(new TestNode(5, new int[] { 500, 500, 500 })); TestNode found = tree.findBestNode(search); Assert.Equal(2, found.Id); Assert.Equal(best.Data, found.Data); }
public void LoadFromFile(BKTree bk) { try { string line; System.IO.StreamReader file = new System.IO.StreamReader(@"C:\Users\Firat\Desktop\All.txt"); while ((line = file.ReadLine()) != null) { bk.Add(line); } file.Close(); } catch (Exception ex) { throw new Exception("Ağaç oluşturulamadı. " + ex.Message); } }
/// <summary> /// Translate a metatable into a BKTree for easier querying /// </summary> /// <param name="metatable"></param> /// <returns></returns> public static BKTree <FrameMetricWrapper> CreateBKTree(VideoFingerPrintDatabaseMetaTableWrapper metatable) { var tree = new BKTree <FrameMetricWrapper>(); foreach (VideoFingerPrintWrapper video in MetaTableUtils.EnumerateVideoFingerPrints(metatable)) { foreach (FrameFingerPrintWrapper frame in video.FingerPrints) { tree.Add(new FrameMetricWrapper { Frame = frame, Video = video, }); } } return(tree); }
public void BKTreeShouldFindBestNodeWithDistance() { BKTree <ExampleMetric> tree = new BKTree <ExampleMetric>(); ExampleMetric search = new ExampleMetric(new int[] { 365, 422, 399 }); ExampleMetric best = new ExampleMetric(4, new int[] { 400, 400, 400 }); tree.Add(new ExampleMetric(1, new int[] { 100, 100, 100 })); tree.Add(new ExampleMetric(2, new int[] { 200, 200, 200 })); tree.Add(new ExampleMetric(3, new int[] { 300, 300, 300 })); tree.Add(best); tree.Add(new ExampleMetric(5, new int[] { 500, 500, 500 })); Tuple <ExampleMetric, int> result = tree.FindClosestElement(search); Assert.AreEqual(58, DistanceMetric.CalculateLeeDistance(search.Data, best.Data)); Assert.AreEqual(58, result.Item2); Assert.AreEqual(4, result.Item1.Id); Assert.AreEqual(best.Data, result.Item1.Data); }
public TextCorrectorController() { //ILevenshteinDistanceAlgorithm levenshteinDistanceAlgorithm = new LevenshteinDistanceAlgorithm(); ILevenshteinDistanceAlgorithm levenshteinDistanceAlgorithm = new DamerauLevenshteinDistanceAlgorithm(); this.bkTree = new BKTree(levenshteinDistanceAlgorithm); FileStream fileStream = new FileStream("Dictionary\\Bulgarian.dic.txt", FileMode.Open); using (StreamReader reader = new StreamReader(fileStream)) { string dictWord = reader.ReadLine(); while (dictWord != null) { this.bkTree.Add(dictWord); dictWord = reader.ReadLine(); } } }
public void BKTree_should_FindBestNodeWithDistance() { BKTree <TestNode> tree = new BKTree <TestNode>(); TestNode search = new TestNode(new int[] { 365, 422, 399 }); TestNode best = new TestNode(4, new int[] { 400, 400, 400 }); tree.add(new TestNode(1, new int[] { 100, 100, 100 })); tree.add(new TestNode(2, new int[] { 200, 200, 200 })); tree.add(new TestNode(3, new int[] { 300, 300, 300 })); tree.add(best); tree.add(new TestNode(5, new int[] { 500, 500, 500 })); Dictionary <TestNode, Int32> result = tree.findBestNodeWithDistance(search); Assert.Equal(1, result.Count); Assert.Equal(58, DistanceMetric.calculateLeeDistance(search.Data, best.Data)); Assert.Equal(58, result.Values.ElementAt(0)); Assert.Equal(4, result.Keys.ElementAt(0).Id); Assert.Equal(best.Data, result.Keys.ElementAt(0).Data); }
public void BKTree_NoDuplicate() { var tree = new BKTree(); tree.Add("AaaA"); tree.Add("TaaT"); tree.Add("TTTT"); tree.Add("Test"); tree.Add("Test"); tree.Add("TEST"); tree.Add("TeSt"); tree.Add("TeST"); tree.Add("Text"); tree.Add("Text"); tree.Add("TEXt"); var result_d0 = tree.Search("Test", 0).Count; Assert.AreEqual(1, result_d0); result_d0 = tree.Search("Text", 0).Count; Assert.AreEqual(1, result_d0); result_d0 = tree.Search("aaaa", 0).Count; Assert.AreEqual(1, result_d0); result_d0 = tree.Search("taat", 0).Count; Assert.AreEqual(1, result_d0); result_d0 = tree.Search("tttt", 0).Count; Assert.AreEqual(1, result_d0); result_d0 = tree.Search("zzzz", 0).Count; Assert.AreEqual(0, result_d0); }
public void BKTree_InsertionOrder() { int iterations = 10; var tree = new BKTree(); string[] array = { "Test", "TeSt", "AaaA", "TaaT", "TTTT", "Text", "TEXt", " ", "--", ":-)" }; for (int i = 1; i <= iterations; i++) { Shuffle(array); Console.WriteLine("BKTree insertion: v{0}", i); foreach (string value in array) { tree.Add(value); Console.WriteLine(value); } Console.WriteLine(" "); var result_d0 = tree.Search("Test", 0).Count; Assert.AreEqual(1, result_d0); result_d0 = tree.Search("Text", 0).Count; Assert.AreEqual(1, result_d0); result_d0 = tree.Search("aaaa", 0).Count; Assert.AreEqual(1, result_d0); result_d0 = tree.Search("taat", 0).Count; Assert.AreEqual(1, result_d0); result_d0 = tree.Search("tttt", 0).Count; Assert.AreEqual(1, result_d0); result_d0 = tree.Search("no match", 0).Count; Assert.AreEqual(0, result_d0); } }
public void BKTree_should_FindBestNode() { BKTree<TestNode> tree = new BKTree<TestNode>(); TestNode search = new TestNode(new int[] { 210, 175, 233 }); TestNode best = new TestNode(2, new int[] { 200, 200, 200 }); tree.add(new TestNode(1, new int[] { 100, 100, 100 })); tree.add(best); tree.add(new TestNode(3, new int[] { 300, 300, 300 })); tree.add(new TestNode(4, new int[] { 400, 400, 400 })); tree.add(new TestNode(5, new int[] { 500, 500, 500 })); TestNode found = tree.findBestNode(search); Assert.Equal(2, found.Id); Assert.Equal(best.Data, found.Data); }
public List <ImageRecord> QueryImage(string queryImagePath, object argument = null) { List <ImageRecord> rtnImageList = new List <ImageRecord>(); CEDD_Descriptor.CEDD cedd = new CEDD_Descriptor.CEDD(); int goodMatchDistance = 35; if (argument != null && argument is Int32) { goodMatchDistance = (int)argument; } double[] queryCeddDiscriptor; using (Bitmap bmp = new Bitmap(Image.FromFile(queryImagePath))) { queryCeddDiscriptor = cedd.Apply(bmp); } Stopwatch sw = Stopwatch.StartNew(); BKTree <CEDDTreeNode> ceddTree = null; if (!CacheHelper.Get <BKTree <CEDDTreeNode> >("CeddIndexTree", out ceddTree)) { CEDDRepository <BKTree <CEDDTreeNode> > repo = new CEDDRepository <BKTree <CEDDTreeNode> >(); ceddTree = repo.Load(); if (ceddTree == null) { throw new InvalidOperationException("Please index CEDD with BK-Tree before querying the Image"); } CacheHelper.Add <BKTree <CEDDTreeNode> >(ceddTree, "CeddIndexTree"); } sw.Stop(); Debug.WriteLine("Load tooked {0} ms", sw.ElapsedMilliseconds); CEDDTreeNode queryNode = new CEDDTreeNode { Id = 0, ImagePath = queryImagePath, CEDDDiscriptor = queryCeddDiscriptor }; sw.Reset(); sw.Start(); Dictionary <CEDDTreeNode, Int32> result = ceddTree.query(queryNode, goodMatchDistance); sw.Stop(); Debug.WriteLine("Query tooked {0} ms", sw.ElapsedMilliseconds); foreach (KeyValuePair <CEDDTreeNode, Int32> ceddNode in result) { ImageRecord rec = new ImageRecord { Id = ceddNode.Key.Id, ImageName = ceddNode.Key.ImageName, ImagePath = ceddNode.Key.ImagePath, Distance = ceddNode.Value }; rtnImageList.Add(rec); } rtnImageList = rtnImageList.OrderBy(x => x.Distance).ToList(); return(rtnImageList); }
public static List <StreetName> Generate(string filePath) { Data data = DataLoader.LoadJustSuffixes(); Dictionary <StreetName, List <int> > zipCodes = new Dictionary <StreetName, List <int> >(); Dictionary <StreetName, List <string> > cities = new Dictionary <StreetName, List <string> >(); Dictionary <StreetNameAndCity, List <int> > streetNameCity2Zips = new Dictionary <StreetNameAndCity, List <int> >(); Dictionary <StreetNameAndZip, List <string> > streetNameZip2Cities = new Dictionary <StreetNameAndZip, List <string> >(); const int PreTypeColumn = 9; const int StreetNameColumn = 11; const int StreetSuffixColumn = 12; const int ZipLeftColumn = 33; const int ZipRightColumn = 34; const int CityLeftColumn = 35; const int CityRightColumn = 36; const int CityLeftAlternate = 37; const int CityRightAlternate = 38; const int PreDirectionColumn = 8; string[] allLines = File.ReadAllLines(filePath).Skip(1).ToArray(); Parallel.ForEach(allLines, line => { string[] lineBits = line.Split(',').Select(n => n.Trim()).ToArray(); string preType = lineBits[PreTypeColumn].ToUpper(); string streetName = lineBits[StreetNameColumn].ToUpper(); if (streetName != "DRIVEWAY" && !streetName.Contains("UNNAMED")) { string streetSuffix = lineBits[StreetSuffixColumn].ToUpper(); for (int c = 0; c < data.Suffixes.LongSuffixes.Length; c++) { if (data.Suffixes.LongSuffixes[c] == streetSuffix) { streetSuffix = data.Suffixes.ShortSuffixes[c]; } } int zipLeft = 0, zipRight = 0; int.TryParse(lineBits[ZipLeftColumn], out zipLeft); int.TryParse(lineBits[ZipRightColumn], out zipRight); string cityLeft = lineBits[CityLeftColumn].ToUpper(); string cityRight = lineBits[CityRightColumn].ToUpper(); if (string.IsNullOrEmpty(cityLeft)) { cityLeft = lineBits[CityLeftAlternate].ToUpper(); } if (string.IsNullOrEmpty(cityRight)) { cityRight = lineBits[CityRightAlternate].ToUpper(); } string preDirection = lineBits[PreDirectionColumn].ToUpper(); if (preDirection == "E") { preDirection = "EAST"; } else if (preDirection == "W") { preDirection = "WEST"; } else if (preDirection == "N") { preDirection = "NORTH"; } else if (preDirection == "S") { preDirection = "SOUTH"; } string cleanedName = streetName; cleanedName = Regex.Replace(cleanedName, @"(\d+)(TH|ST|ND|RD)", "$1"); StreetName name = new StreetName(preDirection, preType, cleanedName, streetSuffix, null, null); List <int> localZips = new List <int>(); if (zipLeft != 0) { localZips.Add(zipLeft); } if (zipRight != 0) { localZips.Add(zipRight); } List <string> localCities = new List <string>(); if (!string.IsNullOrEmpty(cityLeft)) { localCities.Add(cityLeft); } if (!string.IsNullOrEmpty(cityRight)) { localCities.Add(cityRight); } lock (streetNameCity2Zips) { string fullStreetName = Regex.Replace(name.FullStreetName, @"(\d+)(TH|ST|ND|RD)", "$1"); if (zipLeft != 0 && !string.IsNullOrEmpty(cityLeft)) { StreetNameAndCity key1 = new StreetNameAndCity { City = cityLeft, FullStreetName = fullStreetName, }; if (!streetNameCity2Zips.ContainsKey(key1)) { streetNameCity2Zips.Add(key1, new List <int>()); } streetNameCity2Zips[key1].Add(zipLeft); streetNameCity2Zips[key1] = streetNameCity2Zips[key1].Distinct().ToList(); StreetNameAndZip key2 = new StreetNameAndZip { FullStreetName = fullStreetName, Zip = zipLeft, }; if (!streetNameZip2Cities.ContainsKey(key2)) { streetNameZip2Cities.Add(key2, new List <string>()); } streetNameZip2Cities[key2].Add(cityLeft); streetNameZip2Cities[key2] = streetNameZip2Cities[key2].Distinct().ToList(); } if (zipRight != 0 && !string.IsNullOrEmpty(cityRight)) { StreetNameAndCity key1 = new StreetNameAndCity { City = cityRight, FullStreetName = fullStreetName, }; if (!streetNameCity2Zips.ContainsKey(key1)) { streetNameCity2Zips.Add(key1, new List <int>()); } streetNameCity2Zips[key1].Add(zipRight); streetNameCity2Zips[key1] = streetNameCity2Zips[key1].Distinct().ToList(); StreetNameAndZip key2 = new StreetNameAndZip { FullStreetName = fullStreetName, Zip = zipRight, }; if (!streetNameZip2Cities.ContainsKey(key2)) { streetNameZip2Cities.Add(key2, new List <string>()); } streetNameZip2Cities[key2].Add(cityRight); streetNameZip2Cities[key2] = streetNameZip2Cities[key2].Distinct().ToList(); } } lock (zipCodes) { if (!zipCodes.ContainsKey(name)) { zipCodes.Add(name, new List <int>()); } if (zipLeft != 0) { zipCodes[name].Add(zipLeft); } if (zipRight != 0 && zipLeft != zipRight) { zipCodes[name].Add(zipRight); } } lock (cities) { if (!cities.ContainsKey(name)) { cities.Add(name, new List <string>()); } if (!string.IsNullOrEmpty(cityLeft)) { cities[name].Add(cityLeft); } if (!string.IsNullOrEmpty(cityRight) && cityRight != cityLeft) { cities[name].Add(cityRight); } } } }); List <StreetName> allStreetNames = new List <StreetName>(); StreetName[] keys = zipCodes.Keys.ToArray(); foreach (StreetName key in keys) { StreetName newStreetName = new StreetName(key.PreDirection, key.PreType, key.Name, key.Suffix, zipCodes[key].Distinct().ToList(), cities[key].Distinct().ToList()); allStreetNames.Add(newStreetName); } BinaryFormatter bf = new BinaryFormatter(); using (FileStream sw = File.Create("c:/users/brush/desktop/streetNames.dat")) { bf.Serialize(sw, allStreetNames); } string[] uniqueCities = allStreetNames.SelectMany(n => n.Cities).Distinct().ToArray(); File.WriteAllLines("C:/users/brush/desktop/knownCities.csv", uniqueCities); string[] uniqueStreets = allStreetNames.Select(n => n.Name).Distinct().ToArray(); File.WriteAllLines("C:/users/brush/desktop/knownStreets.csv", uniqueStreets); BKTree citiesTree = BKTreeEngine.CreateBKTree(uniqueCities.ToList()); BKTreeSerializer.SerializeTo(citiesTree, "c:/users/brush/desktop/citiesBKTree.dat"); BKTree streetsTree = BKTreeEngine.CreateBKTree(uniqueStreets.ToList()); BKTreeSerializer.SerializeTo(streetsTree, "c:/users/brush/desktop/streetsBKTree.dat"); bf = new BinaryFormatter(); using (FileStream fw = File.Create("C:/users/brush/desktop/streetNameCity2Zips.dat")) { bf.Serialize(fw, streetNameCity2Zips); } bf = new BinaryFormatter(); using (FileStream fw = File.Create("C:/users/brush/desktop/streetNameZip2Cities.dat")) { bf.Serialize(fw, streetNameZip2Cities); } return(allStreetNames); }
public void BKTree_should_ThrowUponAddingNullNode() { BKTree<TestNode> tree = new BKTree<TestNode>(); tree.add(new TestNode(1, new int[] { 100, 200, 300 })); tree.add(new TestNode(2, new int[] { 110, 210, 310 })); tree.add(new TestNode(3, new int[] { 130, 230, 330 })); tree.add(new TestNode(4, new int[] { 140, 240, 340 })); Assert.ThrowsDelegate boom = delegate { tree.add(null); }; Assert.Throws<NullReferenceException>(boom); }
public void BKTreeImage_Search() { var index = new Images.ImageIndex.Index(); var hash_algo = Images.ImageHashAlgorithm.Average; ulong ahash1 = 181; ulong ahash2 = 171; var hash1 = new List <String> { ahash1.ToString(), "1111111111" }; index.Add("Test", hash1); var hash2 = new List <String> { ahash2.ToString(), "1111111110" }; index.Add("Test2", hash2); var hash3 = new List <String> { ahash1.ToString(), "1111111111" }; index.Add("Test3", hash3); var hash4 = new List <String> { ahash1.ToString(), "1111111111" }; index.Add("Test", hash4); var tree = new BKTree(hash_algo, index); var id = index.Id("Test"); tree.Add(id); id = index.Id("Test2"); tree.Add(id); id = index.Id("Test3"); tree.Add(id); // index= test test2 test3 Assert.AreEqual(3, index.FileCount()); Assert.AreEqual(ahash1.ToString(), tree.GetImageHash("Test")); Assert.AreEqual(ahash2.ToString(), tree.GetImageHash("Test2")); Assert.AreEqual(100.0, tree.GetImageSimilarity("Test", "Test")); Assert.AreNotEqual(100.0, tree.GetImageSimilarity("Test", "Test2")); // test -> test test3 var result_d0 = tree.Search("Test", 100).Count; Assert.AreEqual(2, result_d0); // test2 block hash = 171 Assert.AreEqual(ahash2.ToString(), tree.GetImageHash("Test2")); // using Average tree algo Assert.AreEqual("Average", tree.ImageHashAlgo()); }
public void BKTree_should_FindBestNodeWithDistance() { BKTree<TestNode> tree = new BKTree<TestNode>(); TestNode search = new TestNode(new int[] { 365, 422, 399 }); TestNode best = new TestNode(4, new int[] { 400, 400, 400 }); tree.add(new TestNode(1, new int[] { 100, 100, 100 })); tree.add(new TestNode(2, new int[] { 200, 200, 200 })); tree.add(new TestNode(3, new int[] { 300, 300, 300 })); tree.add(best); tree.add(new TestNode(5, new int[] { 500, 500, 500 })); Dictionary<TestNode,Int32> result = tree.findBestNodeWithDistance(search); Assert.Equal(1, result.Count); Assert.Equal(58, DistanceMetric.calculateLeeDistance(search.Data, best.Data)); Assert.Equal(58, result.Values.ElementAt(0)); Assert.Equal(4, result.Keys.ElementAt(0).Id); Assert.Equal(best.Data, result.Keys.ElementAt(0).Data); }
public void BKTree_should_QueryBestMatchesBelowGivenThreshold() { BKTree<TestNode> tree = new BKTree<TestNode>(); TestNode search = new TestNode(new int[] { 399, 400, 400 }); TestNode best1 = new TestNode(41, new int[] { 400, 400, 400 }); TestNode best2 = new TestNode(42, new int[] { 403, 403, 403 }); TestNode best3 = new TestNode(43, new int[] { 406, 406, 406 }); tree.add(new TestNode(1, new int[] { 100, 100, 100 })); tree.add(new TestNode(2, new int[] { 200, 200, 200 })); tree.add(new TestNode(3, new int[] { 300, 300, 300 })); tree.add(best1); tree.add(best2); tree.add(new TestNode(5, new int[] { 500, 500, 500 })); Dictionary<TestNode, Int32> results; // Query for match within distance of 1 (best1 is only expected result) results = tree.query(search, 1); Assert.Equal(1, results.Count); Assert.Equal(1, DistanceMetric.calculateLeeDistance(search.Data, best1.Data)); Assert.Equal(1, results.Values.ElementAt(0)); Assert.Equal(41, results.Keys.ElementAt(0).Id); Assert.Equal(best1.Data, results.Keys.ElementAt(0).Data); // Query for match within distance of 10 (best1 & best2 are expected results) tree.add(best3); // exercise adding another node after already queried results = tree.query(search, 10); Assert.Equal(2, results.Count); Assert.Equal(1, DistanceMetric.calculateLeeDistance(search.Data, best1.Data)); Assert.Equal(10, DistanceMetric.calculateLeeDistance(search.Data, best2.Data)); Assert.True(results.Contains(new KeyValuePair<TestNode, int>(best1, 1))); Assert.True(results.Contains(new KeyValuePair<TestNode, int>(best2, 10))); // Query for matches within distance of 20 (best1, best2 & best3 are expected results) results = tree.query(search, 20); Assert.Equal(3, results.Count); Assert.Equal(1, DistanceMetric.calculateLeeDistance(search.Data, best1.Data)); Assert.Equal(10, DistanceMetric.calculateLeeDistance(search.Data, best2.Data)); Assert.Equal(19, DistanceMetric.calculateLeeDistance(search.Data, best3.Data)); Assert.True(results.Contains(new KeyValuePair<TestNode, int>(best1, 1))); Assert.True(results.Contains(new KeyValuePair<TestNode, int>(best2, 10))); Assert.True(results.Contains(new KeyValuePair<TestNode, int>(best3, 19))); }