Пример #1
0
        // pre-order traversal
        private void ReadRecursively(DataInput @in, TernaryTreeNode node)
        {
            node.splitchar = @in.ReadString().First();
            sbyte mask = (sbyte)@in.ReadByte();

            if ((mask & HAS_TOKEN) != 0)
            {
                node.token = @in.ReadString();
            }
            if ((mask & HAS_VALUE) != 0)
            {
                node.val = Convert.ToInt64(@in.ReadInt64());
            }
            if ((mask & LO_KID) != 0)
            {
                node.loKid = new TernaryTreeNode();
                ReadRecursively(@in, node.loKid);
            }
            if ((mask & EQ_KID) != 0)
            {
                node.eqKid = new TernaryTreeNode();
                ReadRecursively(@in, node.eqKid);
            }
            if ((mask & HI_KID) != 0)
            {
                node.hiKid = new TernaryTreeNode();
                ReadRecursively(@in, node.hiKid);
            }
        }
Пример #2
0
        public override void Build(IInputIterator tfit)
        {
            if (tfit.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (tfit.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            root = new TernaryTreeNode();
            // buffer first
#pragma warning disable 612, 618
            if (tfit.Comparer != BytesRef.UTF8SortedAsUTF16Comparer)
            {
                // make sure it's sorted and the comparer uses UTF16 sort order
                tfit = new SortedInputIterator(tfit, BytesRef.UTF8SortedAsUTF16Comparer);
            }
#pragma warning restore 612, 618

            List <string> tokens = new List <string>();
            List <object> vals   = new List <object>();
            BytesRef      spare;
            CharsRef      charsSpare = new CharsRef();
            while ((spare = tfit.Next()) != null)
            {
                charsSpare.Grow(spare.Length);
                UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare);
                tokens.Add(charsSpare.ToString());
                vals.Add(Convert.ToInt64(tfit.Weight));
            }
            autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root);
        }
Пример #3
0
 public override bool Load(DataInput input)
 {
     lock (this)
     {
         count = input.ReadVInt64();
         root  = new TernaryTreeNode();
         ReadRecursively(input, root);
         return(true);
     }
 }
Пример #4
0
 /// <summary>
 /// Inserting keys in TST in the order middle,small,big (lexicographic measure)
 /// recursively creates a balanced tree which reduces insertion and search
 /// times significantly.
 /// </summary>
 /// <param name="tokens">
 ///          Sorted list of keys to be inserted in TST. </param>
 /// <param name="lo">
 ///          stores the lower index of current list. </param>
 /// <param name="hi">
 ///          stores the higher index of current list. </param>
 /// <param name="root">
 ///          a reference object to root of TST. </param>
 public virtual void BalancedTree(object[] tokens, object[] vals, int lo, int hi, TernaryTreeNode root)
 {
     if (lo > hi)
     {
         return;
     }
     int mid = (lo + hi) / 2;
     root = Insert(root, (string)tokens[mid], vals[mid], 0);
     BalancedTree(tokens, vals, lo, mid - 1, root);
     BalancedTree(tokens, vals, mid + 1, hi, root);
 }
Пример #5
0
        // pre-order traversal
        private void WriteRecursively(DataOutput @out, TernaryTreeNode node)
        {
            // write out the current node
            @out.WriteString(new string(new char[] { node.splitchar }, 0, 1));
            // prepare a mask of kids
            sbyte mask = 0;

            if (node.eqKid != null)
            {
                mask |= EQ_KID;
            }
            if (node.loKid != null)
            {
                mask |= LO_KID;
            }
            if (node.hiKid != null)
            {
                mask |= HI_KID;
            }
            if (node.token != null)
            {
                mask |= HAS_TOKEN;
            }
            if (node.val != null)
            {
                mask |= HAS_VALUE;
            }
            @out.WriteByte((byte)mask);
            if (node.token != null)
            {
                @out.WriteString(node.token);
            }
            if (node.val != null)
            {
                @out.WriteInt64((long)node.val);
            }
            // recurse and write kids
            if (node.loKid != null)
            {
                WriteRecursively(@out, node.loKid);
            }
            if (node.eqKid != null)
            {
                WriteRecursively(@out, node.eqKid);
            }
            if (node.hiKid != null)
            {
                WriteRecursively(@out, node.hiKid);
            }
        }
Пример #6
0
 public override bool Load(DataInput input)
 {
     UninterruptableMonitor.Enter(this);
     try
     {
         count = input.ReadVInt64();
         root  = new TernaryTreeNode();
         ReadRecursively(input, root);
         return(true);
     }
     finally
     {
         UninterruptableMonitor.Exit(this);
     }
 }
Пример #7
0
 /// <summary>
 /// Inserts a key in TST creating a series of Binary Search Trees at each node.
 /// The key is actually stored across the eqKid of each node in a successive
 /// manner.
 /// </summary>
 /// <param name="currentNode">
 ///          a reference node where the insertion will take currently. </param>
 /// <param name="s">
 ///          key to be inserted in TST. </param>
 /// <param name="x">
 ///          index of character in key to be inserted currently. </param>
 /// <returns> currentNode The new reference to root node of TST </returns>
 public virtual TernaryTreeNode Insert(TernaryTreeNode currentNode, string s, object val, int x)
 {
     if (s == null || s.Length <= x)
     {
         return currentNode;
     }
     if (currentNode == null)
     {
         TernaryTreeNode newNode = new TernaryTreeNode();
         newNode.splitchar = s.charAt(x);
         currentNode = newNode;
         if (x < s.Length - 1)
         {
             currentNode.eqKid = Insert(currentNode.eqKid, s, val, x + 1);
         }
         else
         {
             currentNode.token = s.ToString();
             currentNode.val = val;
             return currentNode;
         }
     }
     else if (currentNode.splitchar > s.charAt(x))
     {
         currentNode.loKid = Insert(currentNode.loKid, s, val, x);
     }
     else if (currentNode.splitchar == s.charAt(x))
     {
         if (x < s.Length - 1)
         {
             currentNode.eqKid = Insert(currentNode.eqKid, s, val, x + 1);
         }
         else
         {
             currentNode.token = s;
             currentNode.val = val;
             return currentNode;
         }
     }
     else
     {
         currentNode.hiKid = Insert(currentNode.hiKid, s, val, x);
     }
     return currentNode;
 }
Пример #8
0
 /// <summary>
 /// Inserts a key in TST creating a series of Binary Search Trees at each node.
 /// The key is actually stored across the eqKid of each node in a successive
 /// manner.
 /// </summary>
 /// <param name="currentNode">
 ///          a reference node where the insertion will take currently. </param>
 /// <param name="s">
 ///          key to be inserted in TST. </param>
 /// <param name="x">
 ///          index of character in key to be inserted currently. </param>
 /// <returns> The new reference to root node of TST </returns>
 public virtual TernaryTreeNode Insert(TernaryTreeNode currentNode, string s, object val, int x)
 {
     if (s is null || s.Length <= x)
     {
         return(currentNode);
     }
     if (currentNode is null)
     {
         TernaryTreeNode newNode = new TernaryTreeNode();
         newNode.splitchar = s[x];
         currentNode       = newNode;
         if (x < s.Length - 1)
         {
             currentNode.eqKid = Insert(currentNode.eqKid, s, val, x + 1);
         }
         else
         {
             currentNode.token = s.ToString();
             currentNode.val   = val;
             return(currentNode);
         }
     }
     else if (currentNode.splitchar > s[x])
     {
         currentNode.loKid = Insert(currentNode.loKid, s, val, x);
     }
     else if (currentNode.splitchar == s[x])
     {
         if (x < s.Length - 1)
         {
             currentNode.eqKid = Insert(currentNode.eqKid, s, val, x + 1);
         }
         else
         {
             currentNode.token = s;
             currentNode.val   = val;
             return(currentNode);
         }
     }
     else
     {
         currentNode.hiKid = Insert(currentNode.hiKid, s, val, x);
     }
     return(currentNode);
 }
Пример #9
0
        public override void Build(IInputEnumerator enumerator)
        {
            // LUCENENT: Added guard clause for null
            if (enumerator is null)
            {
                throw new ArgumentNullException(nameof(enumerator));
            }

            if (enumerator.HasPayloads)
            {
                throw new ArgumentException("this suggester doesn't support payloads");
            }
            if (enumerator.HasContexts)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }
            root = new TernaryTreeNode();
            // buffer first
#pragma warning disable 612, 618
            if (enumerator.Comparer != BytesRef.UTF8SortedAsUTF16Comparer)
            {
                // make sure it's sorted and the comparer uses UTF16 sort order
                enumerator = new SortedInputEnumerator(enumerator, BytesRef.UTF8SortedAsUTF16Comparer);
            }
#pragma warning restore 612, 618

            JCG.List <string> tokens = new JCG.List <string>();
            JCG.List <object> vals   = new JCG.List <object>();
            BytesRef          spare;
            CharsRef          charsSpare = new CharsRef();
            while (enumerator.MoveNext())
            {
                spare = enumerator.Current;
                charsSpare.Grow(spare.Length);
                UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare);
                tokens.Add(charsSpare.ToString());
                vals.Add(enumerator.Weight);
            }
            autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root);
        }
Пример #10
0
        public override IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, bool onlyMorePopular, int num)
        {
            if (contexts != null)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            IList <TernaryTreeNode> list = autocomplete.PrefixCompletion(root, key, 0);
            List <LookupResult>     res  = new List <LookupResult>();

            if (list == null || list.Count == 0)
            {
                return(res);
            }
            int maxCnt = Math.Min(num, list.Count);

            if (onlyMorePopular)
            {
                LookupPriorityQueue queue = new LookupPriorityQueue(num);

                foreach (TernaryTreeNode ttn in list)
                {
                    queue.InsertWithOverflow(new LookupResult(ttn.token, (long)ttn.val));
                }
                foreach (LookupResult lr in queue.GetResults())
                {
                    res.Add(lr);
                }
            }
            else
            {
                for (int i = 0; i < maxCnt; i++)
                {
                    TernaryTreeNode ttn = list[i];
                    res.Add(new LookupResult(ttn.token, (long)ttn.val));
                }
            }
            return(res);
        }
Пример #11
0
        /// <summary>
        /// Auto-completes a given prefix query using Depth-First Search with the end
        /// of prefix as source node each time finding a new leaf to get a complete key
        /// to be added in the suggest list.
        /// </summary>
        /// <param name="root">
        ///          a reference to root node of TST. </param>
        /// <param name="s">
        ///          prefix query to be auto-completed. </param>
        /// <param name="x">
        ///          index of current character to be searched while traversing through
        ///          the prefix in TST. </param>
        /// <returns> suggest list of auto-completed keys for the given prefix query. </returns>
        public virtual IList <TernaryTreeNode> PrefixCompletion(TernaryTreeNode root, string s, int x)
        {
            TernaryTreeNode p = root;

            JCG.List <TernaryTreeNode> suggest = new JCG.List <TernaryTreeNode>();

            while (p != null)
            {
                if (s[x] < p.splitchar)
                {
                    p = p.loKid;
                }
                else if (s[x] == p.splitchar)
                {
                    if (x == s.Length - 1)
                    {
                        break;
                    }
                    else
                    {
                        x++;
                    }
                    p = p.eqKid;
                }
                else
                {
                    p = p.hiKid;
                }
            }

            if (p is null)
            {
                return(suggest);
            }
            if (p.eqKid is null && p.token is null)
            {
                return(suggest);
            }
            if (p.eqKid is null && p.token != null)
            {
                suggest.Add(p);
                return(suggest);
            }

            if (p.token != null)
            {
                suggest.Add(p);
            }
            p = p.eqKid;

            var st = new Stack <TernaryTreeNode>();

            st.Push(p);
            while (st.Count > 0)
            {
                TernaryTreeNode top = st.Peek();
                st.Pop();
                if (top.token != null)
                {
                    suggest.Add(top);
                }
                if (top.eqKid != null)
                {
                    st.Push(top.eqKid);
                }
                if (top.loKid != null)
                {
                    st.Push(top.loKid);
                }
                if (top.hiKid != null)
                {
                    st.Push(top.hiKid);
                }
            }
            return(suggest);
        }
Пример #12
0
 // pre-order traversal
 private void WriteRecursively(DataOutput @out, TernaryTreeNode node)
 {
     // write out the current node
     @out.WriteString(new string(new char[] { node.splitchar }, 0, 1));
     // prepare a mask of kids
     sbyte mask = 0;
     if (node.eqKid != null)
     {
         mask |= EQ_KID;
     }
     if (node.loKid != null)
     {
         mask |= LO_KID;
     }
     if (node.hiKid != null)
     {
         mask |= HI_KID;
     }
     if (node.token != null)
     {
         mask |= HAS_TOKEN;
     }
     if (node.val != null)
     {
         mask |= HAS_VALUE;
     }
     @out.WriteByte((byte)mask);
     if (node.token != null)
     {
         @out.WriteString(node.token);
     }
     if (node.val != null)
     {
         @out.WriteLong((long)node.val);
     }
     // recurse and write kids
     if (node.loKid != null)
     {
         WriteRecursively(@out, node.loKid);
     }
     if (node.eqKid != null)
     {
         WriteRecursively(@out, node.eqKid);
     }
     if (node.hiKid != null)
     {
         WriteRecursively(@out, node.hiKid);
     }
 }
Пример #13
0
 // pre-order traversal
 private void ReadRecursively(DataInput @in, TernaryTreeNode node)
 {
     node.splitchar = @in.ReadString().First();
     sbyte mask = (sbyte)@in.ReadByte();
     if ((mask & HAS_TOKEN) != 0)
     {
         node.token = @in.ReadString();
     }
     if ((mask & HAS_VALUE) != 0)
     {
         node.val = Convert.ToInt64(@in.ReadLong());
     }
     if ((mask & LO_KID) != 0)
     {
         node.loKid = new TernaryTreeNode();
         ReadRecursively(@in, node.loKid);
     }
     if ((mask & EQ_KID) != 0)
     {
         node.eqKid = new TernaryTreeNode();
         ReadRecursively(@in, node.eqKid);
     }
     if ((mask & HI_KID) != 0)
     {
         node.hiKid = new TernaryTreeNode();
         ReadRecursively(@in, node.hiKid);
     }
 }
Пример #14
0
 public override bool Load(DataInput input)
 {
     lock (this)
     {
         count = input.ReadVLong();
         root = new TernaryTreeNode();
         ReadRecursively(input, root);
         return true;
     }
 }
Пример #15
0
        public override void Build(InputIterator tfit)
        {
            if (tfit.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (tfit.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            root = new TernaryTreeNode();
            // buffer first
            if (tfit.Comparator != BytesRef.UTF8SortedAsUTF16Comparator)
            {
                // make sure it's sorted and the comparator uses UTF16 sort order
                tfit = new SortedInputIterator(tfit, BytesRef.UTF8SortedAsUTF16Comparator);
            }

            List<string> tokens = new List<string>();
            List<Number> vals = new List<Number>();
            BytesRef spare;
            CharsRef charsSpare = new CharsRef();
            while ((spare = tfit.Next()) != null)
            {
                charsSpare.Grow(spare.Length);
                UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare);
                tokens.Add(charsSpare.ToString());
                vals.Add(Convert.ToInt64(tfit.Weight));
            }
            autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root);
        }
Пример #16
0
        /// <summary>
        /// Inserting keys in TST in the order middle,small,big (lexicographic measure)
        /// recursively creates a balanced tree which reduces insertion and search
        /// times significantly.
        /// </summary>
        /// <param name="tokens">
        ///          Sorted list of keys to be inserted in TST. </param>
        /// <param name="lo">
        ///          stores the lower index of current list. </param>
        /// <param name="hi">
        ///          stores the higher index of current list. </param>
        /// <param name="root">
        ///          a reference object to root of TST. </param>
        public virtual void BalancedTree(object[] tokens, object[] vals, int lo, int hi, TernaryTreeNode root)
        {
            if (lo > hi)
            {
                return;
            }
            int mid = (lo + hi) / 2;

            root = Insert(root, (string)tokens[mid], vals[mid], 0);
            BalancedTree(tokens, vals, lo, mid - 1, root);
            BalancedTree(tokens, vals, mid + 1, hi, root);
        }
Пример #17
0
        public override void Build(IInputIterator tfit)
        {
            if (tfit.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (tfit.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            root = new TernaryTreeNode();
            // buffer first
            #pragma warning disable 612, 618
            if (tfit.Comparator != BytesRef.UTF8SortedAsUTF16Comparer)
            {
                // make sure it's sorted and the comparator uses UTF16 sort order
                tfit = new SortedInputIterator(tfit, BytesRef.UTF8SortedAsUTF16Comparer);
            }
            #pragma warning restore 612, 618

            List<string> tokens = new List<string>();
            List<object> vals = new List<object>(); // LUCENENET TODO: Should this be long? in Java it was Number, but we can probably do better than object
            BytesRef spare;
            CharsRef charsSpare = new CharsRef();
            while ((spare = tfit.Next()) != null)
            {
                charsSpare.Grow(spare.Length);
                UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare);
                tokens.Add(charsSpare.ToString());
                vals.Add(Convert.ToInt64(tfit.Weight));
            }
            autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root);
        }
Пример #18
0
        /// <summary>
        /// Auto-completes a given prefix query using Depth-First Search with the end
        /// of prefix as source node each time finding a new leaf to get a complete key
        /// to be added in the suggest list.
        /// </summary>
        /// <param name="root">
        ///          a reference to root node of TST. </param>
        /// <param name="s">
        ///          prefix query to be auto-completed. </param>
        /// <param name="x">
        ///          index of current character to be searched while traversing through
        ///          the prefix in TST. </param>
        /// <returns> suggest list of auto-completed keys for the given prefix query. </returns>
        public virtual List<TernaryTreeNode> PrefixCompletion(TernaryTreeNode root, string s, int x)
        {
            TernaryTreeNode p = root;
            List<TernaryTreeNode> suggest = new List<TernaryTreeNode>();

            while (p != null)
            {
                if (s.charAt(x) < p.splitchar)
                {
                    p = p.loKid;
                }
                else if (s.charAt(x) == p.splitchar)
                {
                    if (x == s.Length - 1)
                    {
                        break;
                    }
                    else
                    {
                        x++;
                    }
                    p = p.eqKid;
                }
                else
                {
                    p = p.hiKid;
                }
            }

            if (p == null)
            {
                return suggest;
            }
            if (p.eqKid == null && p.token == null)
            {
                return suggest;
            }
            if (p.eqKid == null && p.token != null)
            {
                suggest.Add(p);
                return suggest;
            }

            if (p.token != null)
            {
                suggest.Add(p);
            }
            p = p.eqKid;

            var st = new Stack<TernaryTreeNode>();
            st.Push(p);
            while (st.Count > 0)
            {
                TernaryTreeNode top = st.Peek();
                st.Pop();
                if (top.token != null)
                {
                    suggest.Add(top);
                }
                if (top.eqKid != null)
                {
                    st.Push(top.eqKid);
                }
                if (top.loKid != null)
                {
                    st.Push(top.loKid);
                }
                if (top.hiKid != null)
                {
                    st.Push(top.hiKid);
                }
            }
            return suggest;
        }