/// <summary>
        /// This should be the only function setting any internal variables other than parent.
        /// </summary>
        private void Split(List <BookData> books, List <string> splittingWords, List <int> splittingBookIndices, int splitIndex, int maxBooksPerLeaf)
        {
            _splittingWord = splittingWords[splitIndex];

            books.OrderBy(book => book.WordCounts[SplittingWord]);

            int median = books[books.Count / 2].WordCounts[SplittingWord];

            if (books.Count % 2 == 0)
            {
                median = (median + books[(books.Count - 1) / 2].WordCounts[SplittingWord]) / 2;
            }
            _splittingValue = median;

            var leftIndices  = new List <int>();
            var rightIndices = new List <int>();

            foreach (int i in splittingBookIndices)
            {
                if (books[i].WordCounts[SplittingWord] <= SplittingValue)
                {
                    leftIndices.Add(i);
                }
                else
                {
                    rightIndices.Add(i);
                }
            }

            _leftChild  = new KDTreeNode(this, books, splittingWords, leftIndices, splitIndex, maxBooksPerLeaf);
            _rightChild = new KDTreeNode(this, books, splittingWords, rightIndices, splitIndex, maxBooksPerLeaf);
        }
        private KDTreeNode(KDTreeNode parent, List <BookData> books, List <string> splittingWords, List <int> splittingBookIndices, int lastSplitIndex, int maxBooksPerLeaf)
        {
            _parent = parent;

            if (splittingBookIndices.Count > maxBooksPerLeaf)
            {
                // Last split index + 1 to get current index.
                Split(books, splittingWords, splittingBookIndices, lastSplitIndex + 1, maxBooksPerLeaf);
            }
            else
            {
                // Each indes remaining is a leaf.
                _containedBookIndices = splittingBookIndices;
            }
        }
Exemplo n.º 3
0
 /// <summary>
 /// Creates a KDTree to speed up the K Nearest Neighbor algorithm.
 /// </summary>
 /// <param name="books">Books contained in the tree.</param>
 /// <param name="wordsToSplitOn">Words (most common words) used in the algorithm.</param>
 public KDTree(List <BookData> books, List <string> wordsToSplitOn, int maxBooksPerLeaf = 4)
 {
     _books          = books;
     _wordsToSplitOn = wordsToSplitOn;
     _root           = new KDTreeNode(books, wordsToSplitOn, maxBooksPerLeaf);
 }