public Prefix checkPrefix(String prefix, TSTNode start) { TSTNode startNode = getNode(prefix, start); //System.out.println("the result of node split char:"+startNode.splitchar); if (startNode == null) { return Prefix.MisMatch; } if (startNode.data != -1) { return Prefix.Match; } return Prefix.MatchPrefix; }
//building TstNode tree private TSTree() { String word; for (int i = 0; i < _specialchars.Length; i++) { word = _specialchars[i]; String key = word; word = key; if (rootNode == null) { rootNode = new TSTNode(key[0]); } // TSTNode node = null; if (key.Length > 0 && rootNode != null) { TSTNode currentNode = rootNode; currentNode = getOrCreateNode(word); currentNode.data = 1; } } }
//checkPrefix �õ��˺��� private TSTNode getNode(String key, TSTNode startNode) { if (key == null || key.Length == 0) { return null; } TSTNode currentNode = startNode; int charIndex = 0; while (true) { if (currentNode == null) { //System.Console.WriteLine("currentNode == null"); return null; } int charComp = key[charIndex] - currentNode.splitchar; if (charComp == 0) { charIndex++; //System.Console.WriteLine("charIndex:"+charIndex+" key length:"+ key.Length ); if (charIndex == key.Length) { return currentNode; } currentNode = currentNode.EQKID; } else if (charComp < 0) { currentNode = currentNode.LOKID; } else { currentNode = currentNode.HIKID; } } }
/// <summary> /// Constructor method. /// </summary> /// <param name="splitchar"> /// The char used in the split. </param> /// <param name="parent"> /// The parent node. </param> internal TSTNode(char splitchar, TSTNode parent) { this.splitchar = splitchar; relatives[PARENT] = parent; }
/// <summary> /// Constructor method. /// </summary> /// <param name="outerInstance">The containing <see cref="JaspellTernarySearchTrie"/></param> /// <param name="splitchar"> /// The char used in the split. </param> /// <param name="parent"> /// The parent node. </param> internal TSTNode(JaspellTernarySearchTrie outerInstance, char splitchar, TSTNode parent) { this.outerInstance = outerInstance; this.splitchar = splitchar; relatives[PARENT] = parent; }
/// <summary> /// Returns keys sorted in alphabetical order. This includes the start Node and /// all nodes connected to the start Node. /// <para> /// The number of keys returned is limited to numReturnValues. To get a list /// that isn't limited in size, set numReturnValues to -1. /// /// </para> /// </summary> /// <param name="startNode"> /// The top node defining the subtrie to be searched. </param> /// <param name="numReturnValues"> /// The maximum number of values returned from this method. </param> /// <returns> A <see cref="IList{String}"/> with the results. </returns> protected virtual IList <string> SortKeys(TSTNode startNode, int numReturnValues) { return(SortKeysRecursion(startNode, ((numReturnValues < 0) ? -1 : numReturnValues), new List <string>())); }
/// <summary> /// Recursively visits each node to be deleted. /// /// To delete a node, first set its data to null, then pass it into this /// method, then pass the node returned by this method into this method (make /// sure you don't delete the data of any of the nodes returned from this /// method!) and continue in this fashion until the node returned by this /// method is <c>null</c>. /// /// The TSTNode instance returned by this method will be next node to be /// operated on by <see cref="DeleteNodeRecursion(TSTNode)"/> (This emulates recursive /// method call while avoiding the overhead normally associated with a /// recursive method.) /// </summary> /// <param name="currentNode"> The node to delete. </param> /// <returns> The next node to be called in deleteNodeRecursion. </returns> private TSTNode DeleteNodeRecursion(TSTNode currentNode) { if (currentNode == null) { return(null); } if (currentNode.relatives[TSTNode.EQKID] != null || currentNode.data != null) { return(null); } // can't delete this node if it has a non-null eq kid or data TSTNode currentParent = currentNode.relatives[TSTNode.PARENT]; bool lokidNull = currentNode.relatives[TSTNode.LOKID] == null; bool hikidNull = currentNode.relatives[TSTNode.HIKID] == null; int childType; if (currentParent.relatives[TSTNode.LOKID] == currentNode) { childType = TSTNode.LOKID; } else if (currentParent.relatives[TSTNode.EQKID] == currentNode) { childType = TSTNode.EQKID; } else if (currentParent.relatives[TSTNode.HIKID] == currentNode) { childType = TSTNode.HIKID; } else { rootNode = null; return(null); } if (lokidNull && hikidNull) { currentParent.relatives[childType] = null; return(currentParent); } if (lokidNull) { currentParent.relatives[childType] = currentNode.relatives[TSTNode.HIKID]; currentNode.relatives[TSTNode.HIKID].relatives[TSTNode.PARENT] = currentParent; return(currentParent); } if (hikidNull) { currentParent.relatives[childType] = currentNode.relatives[TSTNode.LOKID]; currentNode.relatives[TSTNode.LOKID].relatives[TSTNode.PARENT] = currentParent; return(currentParent); } int deltaHi = currentNode.relatives[TSTNode.HIKID].splitchar - currentNode.splitchar; int deltaLo = currentNode.splitchar - currentNode.relatives[TSTNode.LOKID].splitchar; int movingKid; TSTNode targetNode; if (deltaHi == deltaLo) { if (new Random(1).NextDouble() < 0.5) { deltaHi++; } else { deltaLo++; } } if (deltaHi > deltaLo) { movingKid = TSTNode.HIKID; targetNode = currentNode.relatives[TSTNode.LOKID]; } else { movingKid = TSTNode.LOKID; targetNode = currentNode.relatives[TSTNode.HIKID]; } while (targetNode.relatives[movingKid] != null) { targetNode = targetNode.relatives[movingKid]; } targetNode.relatives[movingKid] = currentNode.relatives[movingKid]; currentParent.relatives[childType] = targetNode; targetNode.relatives[TSTNode.PARENT] = currentParent; if (!lokidNull) { currentNode.relatives[TSTNode.LOKID] = null; } if (!hikidNull) { currentNode.relatives[TSTNode.HIKID] = null; } return(currentParent); }
/// <summary> /// Returns the total number of nodes in the subtrie below and including the /// starting Node. The method counts nodes whether or not they have data. /// </summary> /// <param name="startingNode"> /// The top node of the subtrie. The node that defines the subtrie. </param> /// <returns> The total number of nodes in the subtrie. </returns> protected internal virtual int NumNodes(TSTNode startingNode) { return(RecursiveNodeCalculator(startingNode, false, 0)); }
/// <summary> /// Returns the total number of nodes in the subtrie below and including the /// starting Node. The method counts nodes whether or not they have data. /// </summary> /// <param name="startingNode"> /// The top node of the subtrie. The node that defines the subtrie. </param> /// <returns> The total number of nodes in the subtrie. </returns> protected internal virtual int NumNodes(TSTNode startingNode) { return RecursiveNodeCalculator(startingNode, false, 0); }
/// <summary> /// Constructs a Ternary Search Trie and loads data from a <see cref="FileInfo"/> /// into the Trie. The file is a normal text document, where each line is of /// the form "word TAB float". /// /// <para>Uses the supplied culture to lowercase words before comparing.</para> /// </summary> /// <param name="file"> /// The <see cref="FileInfo"/> with the data to load into the Trie. </param> /// <param name="compression"> /// If true, the file is compressed with the GZIP algorithm, and if /// false, the file is a normal text document. </param> /// <param name="culture">The culture used for lowercasing.</param> /// <exception cref="System.IO.IOException"> /// A problem occured while reading the data. </exception> public JaspellTernarySearchTrie(FileInfo file, bool compression, CultureInfo culture) : this(culture) { using (TextReader @in = (compression) ? IOUtils.GetDecodingReader(new GZipStream(new FileStream(file.FullName, FileMode.Open), CompressionMode.Decompress), Encoding.UTF8) : IOUtils.GetDecodingReader(new FileStream(file.FullName, FileMode.Open), Encoding.UTF8)) { string word; int pos; float? occur, one = new float?(1); while ((word = @in.ReadLine()) != null) { pos = word.IndexOf('\t'); occur = one; if (pos != -1) { occur = Convert.ToSingle(word.Substring(pos + 1).Trim(), CultureInfo.InvariantCulture); word = word.Substring(0, pos); } string key = culture.TextInfo.ToLower(word); if (rootNode == null) { rootNode = new TSTNode(this, key[0], null); } TSTNode node = null; if (key.Length > 0 && rootNode != null) { TSTNode currentNode = rootNode; int charIndex = 0; while (true) { if (currentNode == null) { break; } int charComp = CompareCharsAlphabetically(key[charIndex], currentNode.splitchar, culture); if (charComp == 0) { charIndex++; if (charIndex == key.Length) { node = currentNode; break; } currentNode = currentNode.relatives[TSTNode.EQKID]; } else if (charComp < 0) { currentNode = currentNode.relatives[TSTNode.LOKID]; } else { currentNode = currentNode.relatives[TSTNode.HIKID]; } } float?occur2 = null; if (node != null) { occur2 = ((float?)(node.data)); } if (occur2 != null) { occur += (float)occur2; } currentNode = GetOrCreateNode(culture.TextInfo.ToLower(word.Trim())); currentNode.data = occur; } } } }
/// <summary> /// Returns the node indexed by key, or <code>null</code> if that node doesn't /// exist. The search begins at root node. /// </summary> /// <param name="key"> /// A <code>String</code> that indexes the node that is returned. </param> /// <param name="startNode"> /// The top node defining the subtrie to be searched. </param> /// <returns> The node object indexed by key. This object is an instance of an /// inner class named <code>TernarySearchTrie.TSTNode</code>. </returns> protected internal virtual TSTNode GetNode(string key, TSTNode startNode) { if (key == null || startNode == null || key.Length == 0) { return null; } TSTNode currentNode = startNode; int charIndex = 0; while (true) { if (currentNode == null) { return null; } int charComp = compareCharsAlphabetically(key.charAt(charIndex), currentNode.splitchar); if (charComp == 0) { charIndex++; if (charIndex == key.Length) { return currentNode; } currentNode = currentNode.relatives[TSTNode.EQKID]; } else if (charComp < 0) { currentNode = currentNode.relatives[TSTNode.LOKID]; } else { currentNode = currentNode.relatives[TSTNode.HIKID]; } } }
/// <summary> /// Returns the key that indexes the node argument. /// </summary> /// <param name="node"> /// The node whose index is to be calculated. </param> /// <returns> The <code>String</code> that indexes the node argument. </returns> protected internal virtual string getKey(TSTNode node) { StringBuilder getKeyBuffer = new StringBuilder(); getKeyBuffer.Length = 0; getKeyBuffer.Append("" + node.splitchar); TSTNode currentNode; TSTNode lastNode; currentNode = node.relatives[TSTNode.PARENT]; lastNode = node; while (currentNode != null) { if (currentNode.relatives[TSTNode.EQKID] == lastNode) { getKeyBuffer.Append("" + currentNode.splitchar); } lastNode = currentNode; currentNode = currentNode.relatives[TSTNode.PARENT]; } getKeyBuffer.Reverse(); return getKeyBuffer.ToString(); }
/// <summary> /// Recursivelly vists the nodes in order to find the ones that almost match a /// given key. /// </summary> /// <param name="currentNode"> /// The current node. </param> /// <param name="charIndex"> /// The current char. </param> /// <param name="d"> /// The number of differences so far. </param> /// <param name="matchAlmostNumReturnValues"> /// The maximum number of values in the result <code>List</code>. </param> /// <param name="matchAlmostResult2"> /// The results so far. </param> /// <param name="upTo"> /// If true all keys having up to and including matchAlmostDiff /// mismatched letters will be included in the result (including a key /// that is exactly the same as the target string) otherwise keys will /// be included in the result only if they have exactly /// matchAlmostDiff number of mismatched letters. </param> /// <param name="matchAlmostKey"> /// The key being searched. </param> /// <returns> A <code>List</code> with the results. </returns> private IList<string> MatchAlmostRecursion(TSTNode currentNode, int charIndex, int d, string matchAlmostKey, int matchAlmostNumReturnValues, IList<string> matchAlmostResult2, bool upTo) { if ((currentNode == null) || (matchAlmostNumReturnValues != -1 && matchAlmostResult2.Count >= matchAlmostNumReturnValues) || (d < 0) || (charIndex >= matchAlmostKey.length())) { return matchAlmostResult2; } int charComp = compareCharsAlphabetically(matchAlmostKey.charAt(charIndex), currentNode.splitchar); IList<string> matchAlmostResult = matchAlmostResult2; if ((d > 0) || (charComp < 0)) { matchAlmostResult = MatchAlmostRecursion(currentNode.relatives[TSTNode.LOKID], charIndex, d, matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult, upTo); } int nextD = (charComp == 0) ? d : d - 1; bool cond = (upTo) ? (nextD >= 0) : (nextD == 0); if ((matchAlmostKey.Length == charIndex + 1) && cond && (currentNode.data != null)) { matchAlmostResult.Add(getKey(currentNode)); } matchAlmostResult = MatchAlmostRecursion(currentNode.relatives[TSTNode.EQKID], charIndex + 1, nextD, matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult, upTo); if ((d > 0) || (charComp > 0)) { matchAlmostResult = MatchAlmostRecursion(currentNode.relatives[TSTNode.HIKID], charIndex, d, matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult, upTo); } return matchAlmostResult; }
/// <summary> /// Recursivelly visists each node to calculate the number of nodes. /// </summary> /// <param name="currentNode"> /// The current node. </param> /// <param name="checkData"> /// If true we check the data to be different of <code>null</code>. </param> /// <param name="numNodes2"> /// The number of nodes so far. </param> /// <returns> The number of nodes accounted. </returns> private int RecursiveNodeCalculator(TSTNode currentNode, bool checkData, int numNodes2) { if (currentNode == null) { return numNodes2; } int numNodes = RecursiveNodeCalculator(currentNode.relatives[TSTNode.LOKID], checkData, numNodes2); numNodes = RecursiveNodeCalculator(currentNode.relatives[TSTNode.EQKID], checkData, numNodes); numNodes = RecursiveNodeCalculator(currentNode.relatives[TSTNode.HIKID], checkData, numNodes); if (checkData) { if (currentNode.data != null) { numNodes++; } } else { numNodes++; } return numNodes; }
/// <summary> /// Recursively visits each node to be deleted. /// /// To delete a node, first set its data to null, then pass it into this /// method, then pass the node returned by this method into this method (make /// sure you don't delete the data of any of the nodes returned from this /// method!) and continue in this fashion until the node returned by this /// method is <code>null</code>. /// /// The TSTNode instance returned by this method will be next node to be /// operated on by <code>deleteNodeRecursion</code> (This emulates recursive /// method call while avoiding the JVM overhead normally associated with a /// recursive method.) /// </summary> /// <param name="currentNode"> /// The node to delete. </param> /// <returns> The next node to be called in deleteNodeRecursion. </returns> private TSTNode DeleteNodeRecursion(TSTNode currentNode) { if (currentNode == null) { return null; } if (currentNode.relatives[TSTNode.EQKID] != null || currentNode.data != null) { return null; } // can't delete this node if it has a non-null eq kid or data TSTNode currentParent = currentNode.relatives[TSTNode.PARENT]; bool lokidNull = currentNode.relatives[TSTNode.LOKID] == null; bool hikidNull = currentNode.relatives[TSTNode.HIKID] == null; int childType; if (currentParent.relatives[TSTNode.LOKID] == currentNode) { childType = TSTNode.LOKID; } else if (currentParent.relatives[TSTNode.EQKID] == currentNode) { childType = TSTNode.EQKID; } else if (currentParent.relatives[TSTNode.HIKID] == currentNode) { childType = TSTNode.HIKID; } else { rootNode = null; return null; } if (lokidNull && hikidNull) { currentParent.relatives[childType] = null; return currentParent; } if (lokidNull) { currentParent.relatives[childType] = currentNode.relatives[TSTNode.HIKID]; currentNode.relatives[TSTNode.HIKID].relatives[TSTNode.PARENT] = currentParent; return currentParent; } if (hikidNull) { currentParent.relatives[childType] = currentNode.relatives[TSTNode.LOKID]; currentNode.relatives[TSTNode.LOKID].relatives[TSTNode.PARENT] = currentParent; return currentParent; } int deltaHi = currentNode.relatives[TSTNode.HIKID].splitchar - currentNode.splitchar; int deltaLo = currentNode.splitchar - currentNode.relatives[TSTNode.LOKID].splitchar; int movingKid; TSTNode targetNode; if (deltaHi == deltaLo) { if (new Random(1).NextDouble() < 0.5) { deltaHi++; } else { deltaLo++; } } if (deltaHi > deltaLo) { movingKid = TSTNode.HIKID; targetNode = currentNode.relatives[TSTNode.LOKID]; } else { movingKid = TSTNode.LOKID; targetNode = currentNode.relatives[TSTNode.HIKID]; } while (targetNode.relatives[movingKid] != null) { targetNode = targetNode.relatives[movingKid]; } targetNode.relatives[movingKid] = currentNode.relatives[movingKid]; currentParent.relatives[childType] = targetNode; targetNode.relatives[TSTNode.PARENT] = currentParent; if (!lokidNull) { currentNode.relatives[TSTNode.LOKID] = null; } if (!hikidNull) { currentNode.relatives[TSTNode.HIKID] = null; } return currentParent; }
/// <summary> /// Deletes the node passed in as an argument. If this node has non-null data, /// then both the node and the data will be deleted. It also deletes any other /// nodes in the trie that are no longer needed after the deletion of the node. /// </summary> /// <param name="nodeToDelete"> /// The node to delete. </param> private void DeleteNode(TSTNode nodeToDelete) { if (nodeToDelete == null) { return; } nodeToDelete.data = null; while (nodeToDelete != null) { nodeToDelete = DeleteNodeRecursion(nodeToDelete); // deleteNodeRecursion(nodeToDelete); } }
/// <summary> /// Returns keys sorted in alphabetical order. This includes the start Node and /// all nodes connected to the start Node. /// <para> /// The number of keys returned is limited to numReturnValues. To get a list /// that isn't limited in size, set numReturnValues to -1. /// /// </para> /// </summary> /// <param name="startNode"> /// The top node defining the subtrie to be searched. </param> /// <param name="numReturnValues"> /// The maximum number of values returned from this method. </param> /// <returns> A <code>List</code> with the results. </returns> protected internal virtual IList<string> sortKeys(TSTNode startNode, int numReturnValues) { return sortKeysRecursion(startNode, ((numReturnValues < 0) ? -1 : numReturnValues), new List<string>()); }
/** * Returns the node indexed by key, creating that node if it doesn't exist, * and creating any required intermediate nodes if they don't exist. * *@param key A <code>String</code> that indexes the node that is returned. *@return The node object indexed by key. This object is an * instance of an inner class named <code>TernarySearchTrie.TSTNode</code>. *@exception NullPointerException If the key is <code>null</code>. *@exception IllegalArgumentException If the key is an empty <code>String</code>. */ private TSTNode getOrCreateNode(String key) { if (key == null || key.Length == 0) { throw new Exception("NullPointerException"); } if (rootNode == null) { rootNode = new TSTNode(key[0]); } TSTNode currentNode = rootNode; int charIndex = 0; while (true) { int charComp = ( key[charIndex] - currentNode.splitchar); if (charComp == 0) { charIndex++; if (charIndex == key.Length) { return currentNode; } if (currentNode.EQKID == null) { currentNode.EQKID = new TSTNode(key[charIndex]); } currentNode = currentNode.EQKID; } else if (charComp < 0) { if (currentNode.LOKID == null) { currentNode.LOKID = new TSTNode(key[charIndex]); } currentNode = currentNode.LOKID; } else { if (currentNode.HIKID == null) { currentNode.HIKID = new TSTNode(key[charIndex]); } currentNode = currentNode.HIKID; } } }
/// <summary> /// Constructs a Ternary Search Trie and loads data from a <code>File</code> /// into the Trie. The file is a normal text document, where each line is of /// the form "word TAB float". /// </summary> /// <param name="file"> /// The <code>File</code> with the data to load into the Trie. </param> /// <param name="compression"> /// If true, the file is compressed with the GZIP algorithm, and if /// false, the file is a normal text document. </param> /// <exception cref="IOException"> /// A problem occured while reading the data. </exception> public JaspellTernarySearchTrie(File file, bool compression) : this() { BufferedReader @in; if (compression) { @in = new BufferedReader(IOUtils.getDecodingReader(new GZIPInputStream(new FileInputStream(file)), StandardCharsets.UTF_8)); } else { @in = new BufferedReader(IOUtils.getDecodingReader((new FileInputStream(file)), StandardCharsets.UTF_8)); } string word; int pos; float? occur, one = new float?(1); while ((word = @in.readLine()) != null) { pos = word.IndexOf("\t", StringComparison.Ordinal); occur = one; if (pos != -1) { occur = Convert.ToSingle(word.Substring(pos + 1).Trim()); word = word.Substring(0, pos); } string key = word.ToLower(locale); if (rootNode == null) { rootNode = new TSTNode(this, key[0], null); } TSTNode node = null; if (key.Length > 0 && rootNode != null) { TSTNode currentNode = rootNode; int charIndex = 0; while (true) { if (currentNode == null) { break; } int charComp = compareCharsAlphabetically(key[charIndex], currentNode.splitchar); if (charComp == 0) { charIndex++; if (charIndex == key.Length) { node = currentNode; break; } currentNode = currentNode.relatives[TSTNode.EQKID]; } else if (charComp < 0) { currentNode = currentNode.relatives[TSTNode.LOKID]; } else { currentNode = currentNode.relatives[TSTNode.HIKID]; } } float?occur2 = null; if (node != null) { occur2 = ((float?)(node.data)); } if (occur2 != null) { occur += (float)occur2; } currentNode = GetOrCreateNode(word.Trim().ToLower(locale)); currentNode.data = occur; } } @in.close(); }
/// <summary> /// Returns keys sorted in alphabetical order. This includes the current Node /// and all nodes connected to the current Node. /// <para> /// Sorted keys will be appended to the end of the resulting <code>List</code>. /// The result may be empty when this method is invoked, but may not be /// <code>null</code>. /// /// </para> /// </summary> /// <param name="currentNode"> /// The current node. </param> /// <param name="sortKeysNumReturnValues"> /// The maximum number of values in the result. </param> /// <param name="sortKeysResult2"> /// The results so far. </param> /// <returns> A <code>List</code> with the results. </returns> private IList<string> sortKeysRecursion(TSTNode currentNode, int sortKeysNumReturnValues, IList<string> sortKeysResult2) { if (currentNode == null) { return sortKeysResult2; } IList<string> sortKeysResult = sortKeysRecursion(currentNode.relatives[TSTNode.LOKID], sortKeysNumReturnValues, sortKeysResult2); if (sortKeysNumReturnValues != -1 && sortKeysResult.Count >= sortKeysNumReturnValues) { return sortKeysResult; } if (currentNode.data != null) { sortKeysResult.Add(getKey(currentNode)); } sortKeysResult = sortKeysRecursion(currentNode.relatives[TSTNode.EQKID], sortKeysNumReturnValues, sortKeysResult); return sortKeysRecursion(currentNode.relatives[TSTNode.HIKID], sortKeysNumReturnValues, sortKeysResult); }
/// <summary> /// Returns the node indexed by key, creating that node if it doesn't exist, /// and creating any required intermediate nodes if they don't exist. /// </summary> /// <param name="key"> /// A <code>String</code> that indexes the node that is returned. </param> /// <returns> The node object indexed by key. This object is an instance of an /// inner class named <code>TernarySearchTrie.TSTNode</code>. </returns> /// <exception cref="NullPointerException"> /// If the key is <code>null</code>. </exception> /// <exception cref="IllegalArgumentException"> /// If the key is an empty <code>String</code>. </exception> protected internal virtual TSTNode GetOrCreateNode(string key) { if (key == null) { throw new NullReferenceException("attempt to get or create node with null key"); } if (key.Length == 0) { throw new System.ArgumentException("attempt to get or create node with key of zero length"); } if (rootNode == null) { rootNode = new TSTNode(this, key.charAt(0), null); } TSTNode currentNode = rootNode; int charIndex = 0; while (true) { int charComp = compareCharsAlphabetically(key.charAt(charIndex), currentNode.splitchar); if (charComp == 0) { charIndex++; if (charIndex == key.Length) { return currentNode; } if (currentNode.relatives[TSTNode.EQKID] == null) { currentNode.relatives[TSTNode.EQKID] = new TSTNode(this, key.charAt(charIndex), currentNode); } currentNode = currentNode.relatives[TSTNode.EQKID]; } else if (charComp < 0) { if (currentNode.relatives[TSTNode.LOKID] == null) { currentNode.relatives[TSTNode.LOKID] = new TSTNode(this, key.charAt(charIndex), currentNode); } currentNode = currentNode.relatives[TSTNode.LOKID]; } else { if (currentNode.relatives[TSTNode.HIKID] == null) { currentNode.relatives[TSTNode.HIKID] = new TSTNode(this, key.charAt(charIndex), currentNode); } currentNode = currentNode.relatives[TSTNode.HIKID]; } } }
/// <summary> /// Constructor method. /// </summary> /// <param name="splitchar"> /// The char used in the split. </param> /// <param name="parent"> /// The parent node. </param> protected internal TSTNode(JaspellTernarySearchTrie outerInstance, char splitchar, TSTNode parent) { this.outerInstance = outerInstance; this.splitchar = splitchar; relatives[PARENT] = parent; }
/// <summary> /// Constructs a Ternary Search Trie and loads data from a <code>File</code> /// into the Trie. The file is a normal text document, where each line is of /// the form "word TAB float". /// </summary> /// <param name="file"> /// The <code>File</code> with the data to load into the Trie. </param> /// <param name="compression"> /// If true, the file is compressed with the GZIP algorithm, and if /// false, the file is a normal text document. </param> /// <exception cref="IOException"> /// A problem occured while reading the data. </exception> public JaspellTernarySearchTrie(File file, bool compression) : this() { BufferedReader @in; if (compression) { @in = new BufferedReader(IOUtils.getDecodingReader(new GZIPInputStream(new FileInputStream(file)), StandardCharsets.UTF_8)); } else { @in = new BufferedReader(IOUtils.getDecodingReader((new FileInputStream(file)), StandardCharsets.UTF_8)); } string word; int pos; float? occur, one = new float?(1); while ((word = @in.readLine()) != null) { pos = word.IndexOf("\t", StringComparison.Ordinal); occur = one; if (pos != -1) { occur = Convert.ToSingle(word.Substring(pos + 1).Trim()); word = word.Substring(0, pos); } string key = word.ToLower(locale); if (rootNode == null) { rootNode = new TSTNode(this, key[0], null); } TSTNode node = null; if (key.Length > 0 && rootNode != null) { TSTNode currentNode = rootNode; int charIndex = 0; while (true) { if (currentNode == null) { break; } int charComp = compareCharsAlphabetically(key[charIndex], currentNode.splitchar); if (charComp == 0) { charIndex++; if (charIndex == key.Length) { node = currentNode; break; } currentNode = currentNode.relatives[TSTNode.EQKID]; } else if (charComp < 0) { currentNode = currentNode.relatives[TSTNode.LOKID]; } else { currentNode = currentNode.relatives[TSTNode.HIKID]; } } float? occur2 = null; if (node != null) { occur2 = ((float?)(node.data)); } if (occur2 != null) { occur += (float)occur2; } currentNode = GetOrCreateNode(word.Trim().ToLower(locale)); currentNode.data = occur; } } @in.close(); }