Exemplo n.º 1
0
 public Prefix checkPrefix(String prefix, TSTNode start)
 {
     TSTNode startNode = getNode(prefix, start);
     //System.out.println("the result of node split char:"+startNode.splitchar);
     if (startNode == null)
     {
         return Prefix.MisMatch;
     }
     if (startNode.data != -1)
     {
         return Prefix.Match;
     }
     return Prefix.MatchPrefix;
 }
Exemplo n.º 2
0
 //building TstNode tree
 private TSTree()
 {
     String word;
     for (int i = 0; i < _specialchars.Length; i++)
     {
         word = _specialchars[i];
         String key = word;
         word = key;
         if (rootNode == null)
         {
             rootNode = new TSTNode(key[0]);
         }
         //  TSTNode node = null;
         if (key.Length > 0 && rootNode != null)
         {
             TSTNode currentNode = rootNode;
             currentNode =
                 getOrCreateNode(word);
             currentNode.data = 1;
         }
     }
 }
Exemplo n.º 3
0
        //checkPrefix �õ��˺���
        private TSTNode getNode(String key, TSTNode startNode)
        {
            if (key == null || key.Length == 0)
            {
                return null;
            }

            TSTNode currentNode = startNode;
            int charIndex = 0;
            while (true)
            {
                if (currentNode == null)
                {

                    //System.Console.WriteLine("currentNode == null");
                    return null;
                }
                int charComp = key[charIndex] - currentNode.splitchar;
                if (charComp == 0)
                {
                    charIndex++;
                    //System.Console.WriteLine("charIndex:"+charIndex+" key length:"+ key.Length );
                    if (charIndex == key.Length)
                    {
                        return currentNode;
                    }

                    currentNode = currentNode.EQKID;
                }
                else if (charComp < 0)
                {
                    currentNode = currentNode.LOKID;
                }
                else
                {
                    currentNode = currentNode.HIKID;
                }
            }
        }
Exemplo n.º 4
0
 /// <summary>
 /// Constructor method.
 /// </summary>
 /// <param name="splitchar">
 ///          The char used in the split. </param>
 /// <param name="parent">
 ///          The parent node. </param>
 internal TSTNode(char splitchar, TSTNode parent)
 {
     this.splitchar    = splitchar;
     relatives[PARENT] = parent;
 }
Exemplo n.º 5
0
 /// <summary>
 /// Constructor method.
 /// </summary>
 /// <param name="outerInstance">The containing <see cref="JaspellTernarySearchTrie"/></param>
 /// <param name="splitchar">
 ///          The char used in the split. </param>
 /// <param name="parent">
 ///          The parent node. </param>
 internal TSTNode(JaspellTernarySearchTrie outerInstance, char splitchar, TSTNode parent)
 {
     this.outerInstance = outerInstance;
     this.splitchar     = splitchar;
     relatives[PARENT]  = parent;
 }
Exemplo n.º 6
0
 /// <summary>
 /// Returns keys sorted in alphabetical order. This includes the start Node and
 /// all nodes connected to the start Node.
 /// <para>
 /// The number of keys returned is limited to numReturnValues. To get a list
 /// that isn't limited in size, set numReturnValues to -1.
 ///
 /// </para>
 /// </summary>
 /// <param name="startNode">
 ///          The top node defining the subtrie to be searched. </param>
 /// <param name="numReturnValues">
 ///          The maximum number of values returned from this method. </param>
 /// <returns> A <see cref="IList{String}"/> with the results. </returns>
 protected virtual IList <string> SortKeys(TSTNode startNode, int numReturnValues)
 {
     return(SortKeysRecursion(startNode, ((numReturnValues < 0) ? -1 : numReturnValues), new List <string>()));
 }
Exemplo n.º 7
0
        /// <summary>
        /// Recursively visits each node to be deleted.
        ///
        /// To delete a node, first set its data to null, then pass it into this
        /// method, then pass the node returned by this method into this method (make
        /// sure you don't delete the data of any of the nodes returned from this
        /// method!) and continue in this fashion until the node returned by this
        /// method is <c>null</c>.
        ///
        /// The TSTNode instance returned by this method will be next node to be
        /// operated on by <see cref="DeleteNodeRecursion(TSTNode)"/> (This emulates recursive
        /// method call while avoiding the overhead normally associated with a
        /// recursive method.)
        /// </summary>
        /// <param name="currentNode"> The node to delete. </param>
        /// <returns> The next node to be called in deleteNodeRecursion. </returns>
        private TSTNode DeleteNodeRecursion(TSTNode currentNode)
        {
            if (currentNode == null)
            {
                return(null);
            }
            if (currentNode.relatives[TSTNode.EQKID] != null || currentNode.data != null)
            {
                return(null);
            }
            // can't delete this node if it has a non-null eq kid or data
            TSTNode currentParent = currentNode.relatives[TSTNode.PARENT];
            bool    lokidNull     = currentNode.relatives[TSTNode.LOKID] == null;
            bool    hikidNull     = currentNode.relatives[TSTNode.HIKID] == null;
            int     childType;

            if (currentParent.relatives[TSTNode.LOKID] == currentNode)
            {
                childType = TSTNode.LOKID;
            }
            else if (currentParent.relatives[TSTNode.EQKID] == currentNode)
            {
                childType = TSTNode.EQKID;
            }
            else if (currentParent.relatives[TSTNode.HIKID] == currentNode)
            {
                childType = TSTNode.HIKID;
            }
            else
            {
                rootNode = null;
                return(null);
            }
            if (lokidNull && hikidNull)
            {
                currentParent.relatives[childType] = null;
                return(currentParent);
            }
            if (lokidNull)
            {
                currentParent.relatives[childType] = currentNode.relatives[TSTNode.HIKID];
                currentNode.relatives[TSTNode.HIKID].relatives[TSTNode.PARENT] = currentParent;
                return(currentParent);
            }
            if (hikidNull)
            {
                currentParent.relatives[childType] = currentNode.relatives[TSTNode.LOKID];
                currentNode.relatives[TSTNode.LOKID].relatives[TSTNode.PARENT] = currentParent;
                return(currentParent);
            }
            int     deltaHi = currentNode.relatives[TSTNode.HIKID].splitchar - currentNode.splitchar;
            int     deltaLo = currentNode.splitchar - currentNode.relatives[TSTNode.LOKID].splitchar;
            int     movingKid;
            TSTNode targetNode;

            if (deltaHi == deltaLo)
            {
                if (new Random(1).NextDouble() < 0.5)
                {
                    deltaHi++;
                }
                else
                {
                    deltaLo++;
                }
            }
            if (deltaHi > deltaLo)
            {
                movingKid  = TSTNode.HIKID;
                targetNode = currentNode.relatives[TSTNode.LOKID];
            }
            else
            {
                movingKid  = TSTNode.LOKID;
                targetNode = currentNode.relatives[TSTNode.HIKID];
            }
            while (targetNode.relatives[movingKid] != null)
            {
                targetNode = targetNode.relatives[movingKid];
            }
            targetNode.relatives[movingKid]      = currentNode.relatives[movingKid];
            currentParent.relatives[childType]   = targetNode;
            targetNode.relatives[TSTNode.PARENT] = currentParent;
            if (!lokidNull)
            {
                currentNode.relatives[TSTNode.LOKID] = null;
            }
            if (!hikidNull)
            {
                currentNode.relatives[TSTNode.HIKID] = null;
            }
            return(currentParent);
        }
Exemplo n.º 8
0
 /// <summary>
 /// Returns the total number of nodes in the subtrie below and including the
 /// starting Node. The method counts nodes whether or not they have data.
 /// </summary>
 /// <param name="startingNode">
 ///          The top node of the subtrie. The node that defines the subtrie. </param>
 /// <returns> The total number of nodes in the subtrie. </returns>
 protected internal virtual int NumNodes(TSTNode startingNode)
 {
     return(RecursiveNodeCalculator(startingNode, false, 0));
 }
 /// <summary>
 /// Returns the total number of nodes in the subtrie below and including the
 /// starting Node. The method counts nodes whether or not they have data.
 /// </summary>
 /// <param name="startingNode">
 ///          The top node of the subtrie. The node that defines the subtrie. </param>
 /// <returns> The total number of nodes in the subtrie. </returns>
 protected internal virtual int NumNodes(TSTNode startingNode)
 {
     return RecursiveNodeCalculator(startingNode, false, 0);
 }
Exemplo n.º 10
0
 /// <summary>
 /// Constructs a Ternary Search Trie and loads data from a <see cref="FileInfo"/>
 /// into the Trie. The file is a normal text document, where each line is of
 /// the form "word TAB float".
 ///
 /// <para>Uses the supplied culture to lowercase words before comparing.</para>
 /// </summary>
 /// <param name="file">
 ///          The <see cref="FileInfo"/> with the data to load into the Trie. </param>
 /// <param name="compression">
 ///          If true, the file is compressed with the GZIP algorithm, and if
 ///          false, the file is a normal text document. </param>
 /// <param name="culture">The culture used for lowercasing.</param>
 /// <exception cref="System.IO.IOException">
 ///              A problem occured while reading the data. </exception>
 public JaspellTernarySearchTrie(FileInfo file, bool compression, CultureInfo culture)
     : this(culture)
 {
     using (TextReader @in = (compression) ?
                             IOUtils.GetDecodingReader(new GZipStream(new FileStream(file.FullName, FileMode.Open), CompressionMode.Decompress), Encoding.UTF8) :
                             IOUtils.GetDecodingReader(new FileStream(file.FullName, FileMode.Open), Encoding.UTF8))
     {
         string word;
         int    pos;
         float? occur, one = new float?(1);
         while ((word = @in.ReadLine()) != null)
         {
             pos   = word.IndexOf('\t');
             occur = one;
             if (pos != -1)
             {
                 occur = Convert.ToSingle(word.Substring(pos + 1).Trim(), CultureInfo.InvariantCulture);
                 word  = word.Substring(0, pos);
             }
             string key = culture.TextInfo.ToLower(word);
             if (rootNode == null)
             {
                 rootNode = new TSTNode(this, key[0], null);
             }
             TSTNode node = null;
             if (key.Length > 0 && rootNode != null)
             {
                 TSTNode currentNode = rootNode;
                 int     charIndex   = 0;
                 while (true)
                 {
                     if (currentNode == null)
                     {
                         break;
                     }
                     int charComp = CompareCharsAlphabetically(key[charIndex], currentNode.splitchar, culture);
                     if (charComp == 0)
                     {
                         charIndex++;
                         if (charIndex == key.Length)
                         {
                             node = currentNode;
                             break;
                         }
                         currentNode = currentNode.relatives[TSTNode.EQKID];
                     }
                     else if (charComp < 0)
                     {
                         currentNode = currentNode.relatives[TSTNode.LOKID];
                     }
                     else
                     {
                         currentNode = currentNode.relatives[TSTNode.HIKID];
                     }
                 }
                 float?occur2 = null;
                 if (node != null)
                 {
                     occur2 = ((float?)(node.data));
                 }
                 if (occur2 != null)
                 {
                     occur += (float)occur2;
                 }
                 currentNode      = GetOrCreateNode(culture.TextInfo.ToLower(word.Trim()));
                 currentNode.data = occur;
             }
         }
     }
 }
Exemplo n.º 11
0
 /// <summary>
 /// Returns the node indexed by key, or <code>null</code> if that node doesn't
 /// exist. The search begins at root node.
 /// </summary>
 /// <param name="key">
 ///          A <code>String</code> that indexes the node that is returned. </param>
 /// <param name="startNode">
 ///          The top node defining the subtrie to be searched. </param>
 /// <returns> The node object indexed by key. This object is an instance of an
 ///         inner class named <code>TernarySearchTrie.TSTNode</code>. </returns>
 protected internal virtual TSTNode GetNode(string key, TSTNode startNode)
 {
     if (key == null || startNode == null || key.Length == 0)
     {
         return null;
     }
     TSTNode currentNode = startNode;
     int charIndex = 0;
     while (true)
     {
         if (currentNode == null)
         {
             return null;
         }
         int charComp = compareCharsAlphabetically(key.charAt(charIndex), currentNode.splitchar);
         if (charComp == 0)
         {
             charIndex++;
             if (charIndex == key.Length)
             {
                 return currentNode;
             }
             currentNode = currentNode.relatives[TSTNode.EQKID];
         }
         else if (charComp < 0)
         {
             currentNode = currentNode.relatives[TSTNode.LOKID];
         }
         else
         {
             currentNode = currentNode.relatives[TSTNode.HIKID];
         }
     }
 }
Exemplo n.º 12
0
        /// <summary>
        /// Returns the key that indexes the node argument.
        /// </summary>
        /// <param name="node">
        ///          The node whose index is to be calculated. </param>
        /// <returns> The <code>String</code> that indexes the node argument. </returns>
        protected internal virtual string getKey(TSTNode node)
        {
            StringBuilder getKeyBuffer = new StringBuilder();
            getKeyBuffer.Length = 0;
            getKeyBuffer.Append("" + node.splitchar);
            TSTNode currentNode;
            TSTNode lastNode;
            currentNode = node.relatives[TSTNode.PARENT];
            lastNode = node;
            while (currentNode != null)
            {
                if (currentNode.relatives[TSTNode.EQKID] == lastNode)
                {
                    getKeyBuffer.Append("" + currentNode.splitchar);
                }
                lastNode = currentNode;
                currentNode = currentNode.relatives[TSTNode.PARENT];
            }

            getKeyBuffer.Reverse();
            return getKeyBuffer.ToString();
        }
Exemplo n.º 13
0
 /// <summary>
 /// Recursivelly vists the nodes in order to find the ones that almost match a
 /// given key.
 /// </summary>
 /// <param name="currentNode">
 ///          The current node. </param>
 /// <param name="charIndex">
 ///          The current char. </param>
 /// <param name="d">
 ///          The number of differences so far. </param>
 /// <param name="matchAlmostNumReturnValues">
 ///          The maximum number of values in the result <code>List</code>. </param>
 /// <param name="matchAlmostResult2">
 ///          The results so far. </param>
 /// <param name="upTo">
 ///          If true all keys having up to and including matchAlmostDiff
 ///          mismatched letters will be included in the result (including a key
 ///          that is exactly the same as the target string) otherwise keys will
 ///          be included in the result only if they have exactly
 ///          matchAlmostDiff number of mismatched letters. </param>
 /// <param name="matchAlmostKey">
 ///          The key being searched. </param>
 /// <returns> A <code>List</code> with the results. </returns>
 private IList<string> MatchAlmostRecursion(TSTNode currentNode, int charIndex, int d, string matchAlmostKey, int matchAlmostNumReturnValues, IList<string> matchAlmostResult2, bool upTo)
 {
     if ((currentNode == null) || (matchAlmostNumReturnValues != -1 && matchAlmostResult2.Count >= matchAlmostNumReturnValues) || (d < 0) || (charIndex >= matchAlmostKey.length()))
     {
         return matchAlmostResult2;
     }
     int charComp = compareCharsAlphabetically(matchAlmostKey.charAt(charIndex), currentNode.splitchar);
     IList<string> matchAlmostResult = matchAlmostResult2;
     if ((d > 0) || (charComp < 0))
     {
         matchAlmostResult = MatchAlmostRecursion(currentNode.relatives[TSTNode.LOKID], charIndex, d, matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult, upTo);
     }
     int nextD = (charComp == 0) ? d : d - 1;
     bool cond = (upTo) ? (nextD >= 0) : (nextD == 0);
     if ((matchAlmostKey.Length == charIndex + 1) && cond && (currentNode.data != null))
     {
         matchAlmostResult.Add(getKey(currentNode));
     }
     matchAlmostResult = MatchAlmostRecursion(currentNode.relatives[TSTNode.EQKID], charIndex + 1, nextD, matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult, upTo);
     if ((d > 0) || (charComp > 0))
     {
         matchAlmostResult = MatchAlmostRecursion(currentNode.relatives[TSTNode.HIKID], charIndex, d, matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult, upTo);
     }
     return matchAlmostResult;
 }
Exemplo n.º 14
0
 /// <summary>
 /// Recursivelly visists each node to calculate the number of nodes.
 /// </summary>
 /// <param name="currentNode">
 ///          The current node. </param>
 /// <param name="checkData">
 ///          If true we check the data to be different of <code>null</code>. </param>
 /// <param name="numNodes2">
 ///          The number of nodes so far. </param>
 /// <returns> The number of nodes accounted. </returns>
 private int RecursiveNodeCalculator(TSTNode currentNode, bool checkData, int numNodes2)
 {
     if (currentNode == null)
     {
         return numNodes2;
     }
     int numNodes = RecursiveNodeCalculator(currentNode.relatives[TSTNode.LOKID], checkData, numNodes2);
     numNodes = RecursiveNodeCalculator(currentNode.relatives[TSTNode.EQKID], checkData, numNodes);
     numNodes = RecursiveNodeCalculator(currentNode.relatives[TSTNode.HIKID], checkData, numNodes);
     if (checkData)
     {
         if (currentNode.data != null)
         {
             numNodes++;
         }
     }
     else
     {
         numNodes++;
     }
     return numNodes;
 }
Exemplo n.º 15
0
 /// <summary>
 /// Recursively visits each node to be deleted.
 /// 
 /// To delete a node, first set its data to null, then pass it into this
 /// method, then pass the node returned by this method into this method (make
 /// sure you don't delete the data of any of the nodes returned from this
 /// method!) and continue in this fashion until the node returned by this
 /// method is <code>null</code>.
 /// 
 /// The TSTNode instance returned by this method will be next node to be
 /// operated on by <code>deleteNodeRecursion</code> (This emulates recursive
 /// method call while avoiding the JVM overhead normally associated with a
 /// recursive method.)
 /// </summary>
 /// <param name="currentNode">
 ///          The node to delete. </param>
 /// <returns> The next node to be called in deleteNodeRecursion. </returns>
 private TSTNode DeleteNodeRecursion(TSTNode currentNode)
 {
     if (currentNode == null)
     {
         return null;
     }
     if (currentNode.relatives[TSTNode.EQKID] != null || currentNode.data != null)
     {
         return null;
     }
     // can't delete this node if it has a non-null eq kid or data
     TSTNode currentParent = currentNode.relatives[TSTNode.PARENT];
     bool lokidNull = currentNode.relatives[TSTNode.LOKID] == null;
     bool hikidNull = currentNode.relatives[TSTNode.HIKID] == null;
     int childType;
     if (currentParent.relatives[TSTNode.LOKID] == currentNode)
     {
         childType = TSTNode.LOKID;
     }
     else if (currentParent.relatives[TSTNode.EQKID] == currentNode)
     {
         childType = TSTNode.EQKID;
     }
     else if (currentParent.relatives[TSTNode.HIKID] == currentNode)
     {
         childType = TSTNode.HIKID;
     }
     else
     {
         rootNode = null;
         return null;
     }
     if (lokidNull && hikidNull)
     {
         currentParent.relatives[childType] = null;
         return currentParent;
     }
     if (lokidNull)
     {
         currentParent.relatives[childType] = currentNode.relatives[TSTNode.HIKID];
         currentNode.relatives[TSTNode.HIKID].relatives[TSTNode.PARENT] = currentParent;
         return currentParent;
     }
     if (hikidNull)
     {
         currentParent.relatives[childType] = currentNode.relatives[TSTNode.LOKID];
         currentNode.relatives[TSTNode.LOKID].relatives[TSTNode.PARENT] = currentParent;
         return currentParent;
     }
     int deltaHi = currentNode.relatives[TSTNode.HIKID].splitchar - currentNode.splitchar;
     int deltaLo = currentNode.splitchar - currentNode.relatives[TSTNode.LOKID].splitchar;
     int movingKid;
     TSTNode targetNode;
     if (deltaHi == deltaLo)
     {
         if (new Random(1).NextDouble() < 0.5)
         {
             deltaHi++;
         }
         else
         {
             deltaLo++;
         }
     }
     if (deltaHi > deltaLo)
     {
         movingKid = TSTNode.HIKID;
         targetNode = currentNode.relatives[TSTNode.LOKID];
     }
     else
     {
         movingKid = TSTNode.LOKID;
         targetNode = currentNode.relatives[TSTNode.HIKID];
     }
     while (targetNode.relatives[movingKid] != null)
     {
         targetNode = targetNode.relatives[movingKid];
     }
     targetNode.relatives[movingKid] = currentNode.relatives[movingKid];
     currentParent.relatives[childType] = targetNode;
     targetNode.relatives[TSTNode.PARENT] = currentParent;
     if (!lokidNull)
     {
         currentNode.relatives[TSTNode.LOKID] = null;
     }
     if (!hikidNull)
     {
         currentNode.relatives[TSTNode.HIKID] = null;
     }
     return currentParent;
 }
Exemplo n.º 16
0
 /// <summary>
 /// Deletes the node passed in as an argument. If this node has non-null data,
 /// then both the node and the data will be deleted. It also deletes any other
 /// nodes in the trie that are no longer needed after the deletion of the node.
 /// </summary>
 /// <param name="nodeToDelete">
 ///          The node to delete. </param>
 private void DeleteNode(TSTNode nodeToDelete)
 {
     if (nodeToDelete == null)
     {
         return;
     }
     nodeToDelete.data = null;
     while (nodeToDelete != null)
     {
         nodeToDelete = DeleteNodeRecursion(nodeToDelete);
         // deleteNodeRecursion(nodeToDelete);
     }
 }
Exemplo n.º 17
0
 /// <summary>
 /// Returns keys sorted in alphabetical order. This includes the start Node and
 /// all nodes connected to the start Node.
 /// <para>
 /// The number of keys returned is limited to numReturnValues. To get a list
 /// that isn't limited in size, set numReturnValues to -1.
 /// 
 /// </para>
 /// </summary>
 /// <param name="startNode">
 ///          The top node defining the subtrie to be searched. </param>
 /// <param name="numReturnValues">
 ///          The maximum number of values returned from this method. </param>
 /// <returns> A <code>List</code> with the results. </returns>
 protected internal virtual IList<string> sortKeys(TSTNode startNode, int numReturnValues)
 {
     return sortKeysRecursion(startNode, ((numReturnValues < 0) ? -1 : numReturnValues), new List<string>());
 }
Exemplo n.º 18
0
 /**
   *  Returns the node indexed by key, creating that node if it doesn't exist,
   *  and creating any required intermediate nodes if they don't exist.
   *
   *@param  key                           A <code>String</code> that indexes the node that is returned.
   *@return                                  The node object indexed by key. This object is an
   *                                               instance of an inner class named <code>TernarySearchTrie.TSTNode</code>.
   *@exception  NullPointerException      If the key is <code>null</code>.
   *@exception  IllegalArgumentException  If the key is an empty <code>String</code>.
   */
 private TSTNode getOrCreateNode(String key)
 {
     if (key == null || key.Length == 0)
     {
         throw new Exception("NullPointerException");
     }
     if (rootNode == null)
     {
         rootNode = new TSTNode(key[0]);
     }
     TSTNode currentNode = rootNode;
     int charIndex = 0;
     while (true)
     {
         int charComp = (
                 key[charIndex] -
                 currentNode.splitchar);
         if (charComp == 0)
         {
             charIndex++;
             if (charIndex == key.Length)
             {
                 return currentNode;
             }
             if (currentNode.EQKID == null)
             {
                 currentNode.EQKID =
                     new TSTNode(key[charIndex]);
             }
             currentNode = currentNode.EQKID;
         }
         else if (charComp < 0)
         {
             if (currentNode.LOKID == null)
             {
                 currentNode.LOKID =
                     new TSTNode(key[charIndex]);
             }
             currentNode = currentNode.LOKID;
         }
         else
         {
             if (currentNode.HIKID == null)
             {
                 currentNode.HIKID =
                     new TSTNode(key[charIndex]);
             }
             currentNode = currentNode.HIKID;
         }
     }
 }
Exemplo n.º 19
0
        /// <summary>
        /// Constructs a Ternary Search Trie and loads data from a <code>File</code>
        /// into the Trie. The file is a normal text document, where each line is of
        /// the form "word TAB float".
        /// </summary>
        /// <param name="file">
        ///          The <code>File</code> with the data to load into the Trie. </param>
        /// <param name="compression">
        ///          If true, the file is compressed with the GZIP algorithm, and if
        ///          false, the file is a normal text document. </param>
        /// <exception cref="IOException">
        ///              A problem occured while reading the data. </exception>
        public JaspellTernarySearchTrie(File file, bool compression)
            : this()
        {
            BufferedReader @in;

            if (compression)
            {
                @in = new BufferedReader(IOUtils.getDecodingReader(new GZIPInputStream(new FileInputStream(file)), StandardCharsets.UTF_8));
            }
            else
            {
                @in = new BufferedReader(IOUtils.getDecodingReader((new FileInputStream(file)), StandardCharsets.UTF_8));
            }
            string word;
            int    pos;
            float? occur, one = new float?(1);

            while ((word = @in.readLine()) != null)
            {
                pos   = word.IndexOf("\t", StringComparison.Ordinal);
                occur = one;
                if (pos != -1)
                {
                    occur = Convert.ToSingle(word.Substring(pos + 1).Trim());
                    word  = word.Substring(0, pos);
                }
                string key = word.ToLower(locale);
                if (rootNode == null)
                {
                    rootNode = new TSTNode(this, key[0], null);
                }
                TSTNode node = null;
                if (key.Length > 0 && rootNode != null)
                {
                    TSTNode currentNode = rootNode;
                    int     charIndex   = 0;
                    while (true)
                    {
                        if (currentNode == null)
                        {
                            break;
                        }
                        int charComp = compareCharsAlphabetically(key[charIndex], currentNode.splitchar);
                        if (charComp == 0)
                        {
                            charIndex++;
                            if (charIndex == key.Length)
                            {
                                node = currentNode;
                                break;
                            }
                            currentNode = currentNode.relatives[TSTNode.EQKID];
                        }
                        else if (charComp < 0)
                        {
                            currentNode = currentNode.relatives[TSTNode.LOKID];
                        }
                        else
                        {
                            currentNode = currentNode.relatives[TSTNode.HIKID];
                        }
                    }
                    float?occur2 = null;
                    if (node != null)
                    {
                        occur2 = ((float?)(node.data));
                    }
                    if (occur2 != null)
                    {
                        occur += (float)occur2;
                    }
                    currentNode      = GetOrCreateNode(word.Trim().ToLower(locale));
                    currentNode.data = occur;
                }
            }
            @in.close();
        }
Exemplo n.º 20
0
 /// <summary>
 /// Returns keys sorted in alphabetical order. This includes the current Node
 /// and all nodes connected to the current Node.
 /// <para>
 /// Sorted keys will be appended to the end of the resulting <code>List</code>.
 /// The result may be empty when this method is invoked, but may not be
 /// <code>null</code>.
 /// 
 /// </para>
 /// </summary>
 /// <param name="currentNode">
 ///          The current node. </param>
 /// <param name="sortKeysNumReturnValues">
 ///          The maximum number of values in the result. </param>
 /// <param name="sortKeysResult2">
 ///          The results so far. </param>
 /// <returns> A <code>List</code> with the results. </returns>
 private IList<string> sortKeysRecursion(TSTNode currentNode, int sortKeysNumReturnValues, IList<string> sortKeysResult2)
 {
     if (currentNode == null)
     {
         return sortKeysResult2;
     }
     IList<string> sortKeysResult = sortKeysRecursion(currentNode.relatives[TSTNode.LOKID], sortKeysNumReturnValues, sortKeysResult2);
     if (sortKeysNumReturnValues != -1 && sortKeysResult.Count >= sortKeysNumReturnValues)
     {
         return sortKeysResult;
     }
     if (currentNode.data != null)
     {
         sortKeysResult.Add(getKey(currentNode));
     }
     sortKeysResult = sortKeysRecursion(currentNode.relatives[TSTNode.EQKID], sortKeysNumReturnValues, sortKeysResult);
     return sortKeysRecursion(currentNode.relatives[TSTNode.HIKID], sortKeysNumReturnValues, sortKeysResult);
 }
Exemplo n.º 21
0
 /// <summary>
 /// Returns the node indexed by key, creating that node if it doesn't exist,
 /// and creating any required intermediate nodes if they don't exist.
 /// </summary>
 /// <param name="key">
 ///          A <code>String</code> that indexes the node that is returned. </param>
 /// <returns> The node object indexed by key. This object is an instance of an
 ///         inner class named <code>TernarySearchTrie.TSTNode</code>. </returns>
 /// <exception cref="NullPointerException">
 ///              If the key is <code>null</code>. </exception>
 /// <exception cref="IllegalArgumentException">
 ///              If the key is an empty <code>String</code>. </exception>
 protected internal virtual TSTNode GetOrCreateNode(string key)
 {
     if (key == null)
     {
         throw new NullReferenceException("attempt to get or create node with null key");
     }
     if (key.Length == 0)
     {
         throw new System.ArgumentException("attempt to get or create node with key of zero length");
     }
     if (rootNode == null)
     {
         rootNode = new TSTNode(this, key.charAt(0), null);
     }
     TSTNode currentNode = rootNode;
     int charIndex = 0;
     while (true)
     {
         int charComp = compareCharsAlphabetically(key.charAt(charIndex), currentNode.splitchar);
         if (charComp == 0)
         {
             charIndex++;
             if (charIndex == key.Length)
             {
                 return currentNode;
             }
             if (currentNode.relatives[TSTNode.EQKID] == null)
             {
                 currentNode.relatives[TSTNode.EQKID] = new TSTNode(this, key.charAt(charIndex), currentNode);
             }
             currentNode = currentNode.relatives[TSTNode.EQKID];
         }
         else if (charComp < 0)
         {
             if (currentNode.relatives[TSTNode.LOKID] == null)
             {
                 currentNode.relatives[TSTNode.LOKID] = new TSTNode(this, key.charAt(charIndex), currentNode);
             }
             currentNode = currentNode.relatives[TSTNode.LOKID];
         }
         else
         {
             if (currentNode.relatives[TSTNode.HIKID] == null)
             {
                 currentNode.relatives[TSTNode.HIKID] = new TSTNode(this, key.charAt(charIndex), currentNode);
             }
             currentNode = currentNode.relatives[TSTNode.HIKID];
         }
     }
 }
Exemplo n.º 22
0
 /// <summary>
 /// Constructor method.
 /// </summary>
 /// <param name="splitchar">
 ///          The char used in the split. </param>
 /// <param name="parent">
 ///          The parent node. </param>
 protected internal TSTNode(JaspellTernarySearchTrie outerInstance, char splitchar, TSTNode parent)
 {
     this.outerInstance = outerInstance;
     this.splitchar = splitchar;
     relatives[PARENT] = parent;
 }
Exemplo n.º 23
0
 /// <summary>
 /// Constructs a Ternary Search Trie and loads data from a <code>File</code>
 /// into the Trie. The file is a normal text document, where each line is of
 /// the form "word TAB float".
 /// </summary>
 /// <param name="file">
 ///          The <code>File</code> with the data to load into the Trie. </param>
 /// <param name="compression">
 ///          If true, the file is compressed with the GZIP algorithm, and if
 ///          false, the file is a normal text document. </param>
 /// <exception cref="IOException">
 ///              A problem occured while reading the data. </exception>
 public JaspellTernarySearchTrie(File file, bool compression)
     : this()
 {
     BufferedReader @in;
     if (compression)
     {
         @in = new BufferedReader(IOUtils.getDecodingReader(new GZIPInputStream(new FileInputStream(file)), StandardCharsets.UTF_8));
     }
     else
     {
         @in = new BufferedReader(IOUtils.getDecodingReader((new FileInputStream(file)), StandardCharsets.UTF_8));
     }
     string word;
     int pos;
     float? occur, one = new float?(1);
     while ((word = @in.readLine()) != null)
     {
         pos = word.IndexOf("\t", StringComparison.Ordinal);
         occur = one;
         if (pos != -1)
         {
             occur = Convert.ToSingle(word.Substring(pos + 1).Trim());
             word = word.Substring(0, pos);
         }
         string key = word.ToLower(locale);
         if (rootNode == null)
         {
             rootNode = new TSTNode(this, key[0], null);
         }
         TSTNode node = null;
         if (key.Length > 0 && rootNode != null)
         {
             TSTNode currentNode = rootNode;
             int charIndex = 0;
             while (true)
             {
                 if (currentNode == null)
                 {
                     break;
                 }
                 int charComp = compareCharsAlphabetically(key[charIndex], currentNode.splitchar);
                 if (charComp == 0)
                 {
                     charIndex++;
                     if (charIndex == key.Length)
                     {
                         node = currentNode;
                         break;
                     }
                     currentNode = currentNode.relatives[TSTNode.EQKID];
                 }
                 else if (charComp < 0)
                 {
                     currentNode = currentNode.relatives[TSTNode.LOKID];
                 }
                 else
                 {
                     currentNode = currentNode.relatives[TSTNode.HIKID];
                 }
             }
             float? occur2 = null;
             if (node != null)
             {
                 occur2 = ((float?)(node.data));
             }
             if (occur2 != null)
             {
                 occur += (float)occur2;
             }
             currentNode = GetOrCreateNode(word.Trim().ToLower(locale));
             currentNode.data = occur;
         }
     }
     @in.close();
 }