/// <summary> /// This should be the only function setting any internal variables other than parent. /// </summary> private void Split(List <BookData> books, List <string> splittingWords, List <int> splittingBookIndices, int splitIndex, int maxBooksPerLeaf) { _splittingWord = splittingWords[splitIndex]; books.OrderBy(book => book.WordCounts[SplittingWord]); int median = books[books.Count / 2].WordCounts[SplittingWord]; if (books.Count % 2 == 0) { median = (median + books[(books.Count - 1) / 2].WordCounts[SplittingWord]) / 2; } _splittingValue = median; var leftIndices = new List <int>(); var rightIndices = new List <int>(); foreach (int i in splittingBookIndices) { if (books[i].WordCounts[SplittingWord] <= SplittingValue) { leftIndices.Add(i); } else { rightIndices.Add(i); } } _leftChild = new KDTreeNode(this, books, splittingWords, leftIndices, splitIndex, maxBooksPerLeaf); _rightChild = new KDTreeNode(this, books, splittingWords, rightIndices, splitIndex, maxBooksPerLeaf); }
private KDTreeNode(KDTreeNode parent, List <BookData> books, List <string> splittingWords, List <int> splittingBookIndices, int lastSplitIndex, int maxBooksPerLeaf) { _parent = parent; if (splittingBookIndices.Count > maxBooksPerLeaf) { // Last split index + 1 to get current index. Split(books, splittingWords, splittingBookIndices, lastSplitIndex + 1, maxBooksPerLeaf); } else { // Each indes remaining is a leaf. _containedBookIndices = splittingBookIndices; } }
/// <summary> /// Creates a KDTree to speed up the K Nearest Neighbor algorithm. /// </summary> /// <param name="books">Books contained in the tree.</param> /// <param name="wordsToSplitOn">Words (most common words) used in the algorithm.</param> public KDTree(List <BookData> books, List <string> wordsToSplitOn, int maxBooksPerLeaf = 4) { _books = books; _wordsToSplitOn = wordsToSplitOn; _root = new KDTreeNode(books, wordsToSplitOn, maxBooksPerLeaf); }