// pre-order traversal private void ReadRecursively(DataInput @in, TernaryTreeNode node) { node.splitchar = @in.ReadString().First(); sbyte mask = (sbyte)@in.ReadByte(); if ((mask & HAS_TOKEN) != 0) { node.token = @in.ReadString(); } if ((mask & HAS_VALUE) != 0) { node.val = Convert.ToInt64(@in.ReadInt64()); } if ((mask & LO_KID) != 0) { node.loKid = new TernaryTreeNode(); ReadRecursively(@in, node.loKid); } if ((mask & EQ_KID) != 0) { node.eqKid = new TernaryTreeNode(); ReadRecursively(@in, node.eqKid); } if ((mask & HI_KID) != 0) { node.hiKid = new TernaryTreeNode(); ReadRecursively(@in, node.hiKid); } }
public override void Build(IInputIterator tfit) { if (tfit.HasPayloads) { throw new System.ArgumentException("this suggester doesn't support payloads"); } if (tfit.HasContexts) { throw new System.ArgumentException("this suggester doesn't support contexts"); } root = new TernaryTreeNode(); // buffer first #pragma warning disable 612, 618 if (tfit.Comparer != BytesRef.UTF8SortedAsUTF16Comparer) { // make sure it's sorted and the comparer uses UTF16 sort order tfit = new SortedInputIterator(tfit, BytesRef.UTF8SortedAsUTF16Comparer); } #pragma warning restore 612, 618 List <string> tokens = new List <string>(); List <object> vals = new List <object>(); BytesRef spare; CharsRef charsSpare = new CharsRef(); while ((spare = tfit.Next()) != null) { charsSpare.Grow(spare.Length); UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare); tokens.Add(charsSpare.ToString()); vals.Add(Convert.ToInt64(tfit.Weight)); } autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root); }
public override bool Load(DataInput input) { lock (this) { count = input.ReadVInt64(); root = new TernaryTreeNode(); ReadRecursively(input, root); return(true); } }
/// <summary> /// Inserting keys in TST in the order middle,small,big (lexicographic measure) /// recursively creates a balanced tree which reduces insertion and search /// times significantly. /// </summary> /// <param name="tokens"> /// Sorted list of keys to be inserted in TST. </param> /// <param name="lo"> /// stores the lower index of current list. </param> /// <param name="hi"> /// stores the higher index of current list. </param> /// <param name="root"> /// a reference object to root of TST. </param> public virtual void BalancedTree(object[] tokens, object[] vals, int lo, int hi, TernaryTreeNode root) { if (lo > hi) { return; } int mid = (lo + hi) / 2; root = Insert(root, (string)tokens[mid], vals[mid], 0); BalancedTree(tokens, vals, lo, mid - 1, root); BalancedTree(tokens, vals, mid + 1, hi, root); }
// pre-order traversal private void WriteRecursively(DataOutput @out, TernaryTreeNode node) { // write out the current node @out.WriteString(new string(new char[] { node.splitchar }, 0, 1)); // prepare a mask of kids sbyte mask = 0; if (node.eqKid != null) { mask |= EQ_KID; } if (node.loKid != null) { mask |= LO_KID; } if (node.hiKid != null) { mask |= HI_KID; } if (node.token != null) { mask |= HAS_TOKEN; } if (node.val != null) { mask |= HAS_VALUE; } @out.WriteByte((byte)mask); if (node.token != null) { @out.WriteString(node.token); } if (node.val != null) { @out.WriteInt64((long)node.val); } // recurse and write kids if (node.loKid != null) { WriteRecursively(@out, node.loKid); } if (node.eqKid != null) { WriteRecursively(@out, node.eqKid); } if (node.hiKid != null) { WriteRecursively(@out, node.hiKid); } }
public override bool Load(DataInput input) { UninterruptableMonitor.Enter(this); try { count = input.ReadVInt64(); root = new TernaryTreeNode(); ReadRecursively(input, root); return(true); } finally { UninterruptableMonitor.Exit(this); } }
/// <summary> /// Inserts a key in TST creating a series of Binary Search Trees at each node. /// The key is actually stored across the eqKid of each node in a successive /// manner. /// </summary> /// <param name="currentNode"> /// a reference node where the insertion will take currently. </param> /// <param name="s"> /// key to be inserted in TST. </param> /// <param name="x"> /// index of character in key to be inserted currently. </param> /// <returns> currentNode The new reference to root node of TST </returns> public virtual TernaryTreeNode Insert(TernaryTreeNode currentNode, string s, object val, int x) { if (s == null || s.Length <= x) { return currentNode; } if (currentNode == null) { TernaryTreeNode newNode = new TernaryTreeNode(); newNode.splitchar = s.charAt(x); currentNode = newNode; if (x < s.Length - 1) { currentNode.eqKid = Insert(currentNode.eqKid, s, val, x + 1); } else { currentNode.token = s.ToString(); currentNode.val = val; return currentNode; } } else if (currentNode.splitchar > s.charAt(x)) { currentNode.loKid = Insert(currentNode.loKid, s, val, x); } else if (currentNode.splitchar == s.charAt(x)) { if (x < s.Length - 1) { currentNode.eqKid = Insert(currentNode.eqKid, s, val, x + 1); } else { currentNode.token = s; currentNode.val = val; return currentNode; } } else { currentNode.hiKid = Insert(currentNode.hiKid, s, val, x); } return currentNode; }
/// <summary> /// Inserts a key in TST creating a series of Binary Search Trees at each node. /// The key is actually stored across the eqKid of each node in a successive /// manner. /// </summary> /// <param name="currentNode"> /// a reference node where the insertion will take currently. </param> /// <param name="s"> /// key to be inserted in TST. </param> /// <param name="x"> /// index of character in key to be inserted currently. </param> /// <returns> The new reference to root node of TST </returns> public virtual TernaryTreeNode Insert(TernaryTreeNode currentNode, string s, object val, int x) { if (s is null || s.Length <= x) { return(currentNode); } if (currentNode is null) { TernaryTreeNode newNode = new TernaryTreeNode(); newNode.splitchar = s[x]; currentNode = newNode; if (x < s.Length - 1) { currentNode.eqKid = Insert(currentNode.eqKid, s, val, x + 1); } else { currentNode.token = s.ToString(); currentNode.val = val; return(currentNode); } } else if (currentNode.splitchar > s[x]) { currentNode.loKid = Insert(currentNode.loKid, s, val, x); } else if (currentNode.splitchar == s[x]) { if (x < s.Length - 1) { currentNode.eqKid = Insert(currentNode.eqKid, s, val, x + 1); } else { currentNode.token = s; currentNode.val = val; return(currentNode); } } else { currentNode.hiKid = Insert(currentNode.hiKid, s, val, x); } return(currentNode); }
public override void Build(IInputEnumerator enumerator) { // LUCENENT: Added guard clause for null if (enumerator is null) { throw new ArgumentNullException(nameof(enumerator)); } if (enumerator.HasPayloads) { throw new ArgumentException("this suggester doesn't support payloads"); } if (enumerator.HasContexts) { throw new ArgumentException("this suggester doesn't support contexts"); } root = new TernaryTreeNode(); // buffer first #pragma warning disable 612, 618 if (enumerator.Comparer != BytesRef.UTF8SortedAsUTF16Comparer) { // make sure it's sorted and the comparer uses UTF16 sort order enumerator = new SortedInputEnumerator(enumerator, BytesRef.UTF8SortedAsUTF16Comparer); } #pragma warning restore 612, 618 JCG.List <string> tokens = new JCG.List <string>(); JCG.List <object> vals = new JCG.List <object>(); BytesRef spare; CharsRef charsSpare = new CharsRef(); while (enumerator.MoveNext()) { spare = enumerator.Current; charsSpare.Grow(spare.Length); UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare); tokens.Add(charsSpare.ToString()); vals.Add(enumerator.Weight); } autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root); }
public override IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, bool onlyMorePopular, int num) { if (contexts != null) { throw new System.ArgumentException("this suggester doesn't support contexts"); } IList <TernaryTreeNode> list = autocomplete.PrefixCompletion(root, key, 0); List <LookupResult> res = new List <LookupResult>(); if (list == null || list.Count == 0) { return(res); } int maxCnt = Math.Min(num, list.Count); if (onlyMorePopular) { LookupPriorityQueue queue = new LookupPriorityQueue(num); foreach (TernaryTreeNode ttn in list) { queue.InsertWithOverflow(new LookupResult(ttn.token, (long)ttn.val)); } foreach (LookupResult lr in queue.GetResults()) { res.Add(lr); } } else { for (int i = 0; i < maxCnt; i++) { TernaryTreeNode ttn = list[i]; res.Add(new LookupResult(ttn.token, (long)ttn.val)); } } return(res); }
/// <summary> /// Auto-completes a given prefix query using Depth-First Search with the end /// of prefix as source node each time finding a new leaf to get a complete key /// to be added in the suggest list. /// </summary> /// <param name="root"> /// a reference to root node of TST. </param> /// <param name="s"> /// prefix query to be auto-completed. </param> /// <param name="x"> /// index of current character to be searched while traversing through /// the prefix in TST. </param> /// <returns> suggest list of auto-completed keys for the given prefix query. </returns> public virtual IList <TernaryTreeNode> PrefixCompletion(TernaryTreeNode root, string s, int x) { TernaryTreeNode p = root; JCG.List <TernaryTreeNode> suggest = new JCG.List <TernaryTreeNode>(); while (p != null) { if (s[x] < p.splitchar) { p = p.loKid; } else if (s[x] == p.splitchar) { if (x == s.Length - 1) { break; } else { x++; } p = p.eqKid; } else { p = p.hiKid; } } if (p is null) { return(suggest); } if (p.eqKid is null && p.token is null) { return(suggest); } if (p.eqKid is null && p.token != null) { suggest.Add(p); return(suggest); } if (p.token != null) { suggest.Add(p); } p = p.eqKid; var st = new Stack <TernaryTreeNode>(); st.Push(p); while (st.Count > 0) { TernaryTreeNode top = st.Peek(); st.Pop(); if (top.token != null) { suggest.Add(top); } if (top.eqKid != null) { st.Push(top.eqKid); } if (top.loKid != null) { st.Push(top.loKid); } if (top.hiKid != null) { st.Push(top.hiKid); } } return(suggest); }
// pre-order traversal private void WriteRecursively(DataOutput @out, TernaryTreeNode node) { // write out the current node @out.WriteString(new string(new char[] { node.splitchar }, 0, 1)); // prepare a mask of kids sbyte mask = 0; if (node.eqKid != null) { mask |= EQ_KID; } if (node.loKid != null) { mask |= LO_KID; } if (node.hiKid != null) { mask |= HI_KID; } if (node.token != null) { mask |= HAS_TOKEN; } if (node.val != null) { mask |= HAS_VALUE; } @out.WriteByte((byte)mask); if (node.token != null) { @out.WriteString(node.token); } if (node.val != null) { @out.WriteLong((long)node.val); } // recurse and write kids if (node.loKid != null) { WriteRecursively(@out, node.loKid); } if (node.eqKid != null) { WriteRecursively(@out, node.eqKid); } if (node.hiKid != null) { WriteRecursively(@out, node.hiKid); } }
// pre-order traversal private void ReadRecursively(DataInput @in, TernaryTreeNode node) { node.splitchar = @in.ReadString().First(); sbyte mask = (sbyte)@in.ReadByte(); if ((mask & HAS_TOKEN) != 0) { node.token = @in.ReadString(); } if ((mask & HAS_VALUE) != 0) { node.val = Convert.ToInt64(@in.ReadLong()); } if ((mask & LO_KID) != 0) { node.loKid = new TernaryTreeNode(); ReadRecursively(@in, node.loKid); } if ((mask & EQ_KID) != 0) { node.eqKid = new TernaryTreeNode(); ReadRecursively(@in, node.eqKid); } if ((mask & HI_KID) != 0) { node.hiKid = new TernaryTreeNode(); ReadRecursively(@in, node.hiKid); } }
public override bool Load(DataInput input) { lock (this) { count = input.ReadVLong(); root = new TernaryTreeNode(); ReadRecursively(input, root); return true; } }
public override void Build(InputIterator tfit) { if (tfit.HasPayloads) { throw new System.ArgumentException("this suggester doesn't support payloads"); } if (tfit.HasContexts) { throw new System.ArgumentException("this suggester doesn't support contexts"); } root = new TernaryTreeNode(); // buffer first if (tfit.Comparator != BytesRef.UTF8SortedAsUTF16Comparator) { // make sure it's sorted and the comparator uses UTF16 sort order tfit = new SortedInputIterator(tfit, BytesRef.UTF8SortedAsUTF16Comparator); } List<string> tokens = new List<string>(); List<Number> vals = new List<Number>(); BytesRef spare; CharsRef charsSpare = new CharsRef(); while ((spare = tfit.Next()) != null) { charsSpare.Grow(spare.Length); UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare); tokens.Add(charsSpare.ToString()); vals.Add(Convert.ToInt64(tfit.Weight)); } autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root); }
public override void Build(IInputIterator tfit) { if (tfit.HasPayloads) { throw new System.ArgumentException("this suggester doesn't support payloads"); } if (tfit.HasContexts) { throw new System.ArgumentException("this suggester doesn't support contexts"); } root = new TernaryTreeNode(); // buffer first #pragma warning disable 612, 618 if (tfit.Comparator != BytesRef.UTF8SortedAsUTF16Comparer) { // make sure it's sorted and the comparator uses UTF16 sort order tfit = new SortedInputIterator(tfit, BytesRef.UTF8SortedAsUTF16Comparer); } #pragma warning restore 612, 618 List<string> tokens = new List<string>(); List<object> vals = new List<object>(); // LUCENENET TODO: Should this be long? in Java it was Number, but we can probably do better than object BytesRef spare; CharsRef charsSpare = new CharsRef(); while ((spare = tfit.Next()) != null) { charsSpare.Grow(spare.Length); UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare); tokens.Add(charsSpare.ToString()); vals.Add(Convert.ToInt64(tfit.Weight)); } autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root); }
/// <summary> /// Auto-completes a given prefix query using Depth-First Search with the end /// of prefix as source node each time finding a new leaf to get a complete key /// to be added in the suggest list. /// </summary> /// <param name="root"> /// a reference to root node of TST. </param> /// <param name="s"> /// prefix query to be auto-completed. </param> /// <param name="x"> /// index of current character to be searched while traversing through /// the prefix in TST. </param> /// <returns> suggest list of auto-completed keys for the given prefix query. </returns> public virtual List<TernaryTreeNode> PrefixCompletion(TernaryTreeNode root, string s, int x) { TernaryTreeNode p = root; List<TernaryTreeNode> suggest = new List<TernaryTreeNode>(); while (p != null) { if (s.charAt(x) < p.splitchar) { p = p.loKid; } else if (s.charAt(x) == p.splitchar) { if (x == s.Length - 1) { break; } else { x++; } p = p.eqKid; } else { p = p.hiKid; } } if (p == null) { return suggest; } if (p.eqKid == null && p.token == null) { return suggest; } if (p.eqKid == null && p.token != null) { suggest.Add(p); return suggest; } if (p.token != null) { suggest.Add(p); } p = p.eqKid; var st = new Stack<TernaryTreeNode>(); st.Push(p); while (st.Count > 0) { TernaryTreeNode top = st.Peek(); st.Pop(); if (top.token != null) { suggest.Add(top); } if (top.eqKid != null) { st.Push(top.eqKid); } if (top.loKid != null) { st.Push(top.loKid); } if (top.hiKid != null) { st.Push(top.hiKid); } } return suggest; }