예제 #1
0
        /// <summary>
        /// <see cref="object.Equals(object)"/>
        /// </summary>
        public override bool Equals(object obj)
        {
            if (this == obj)
            {
                return(true);
            }
            if (obj == null)
            {
                return(false);
            }
            if (GetType() != obj.GetType())
            {
                return(false);
            }
            SegTokenPair other = (SegTokenPair)obj;

            if (!Arrays.Equals(CharArray, other.CharArray))
            {
                return(false);
            }
            if (From != other.From)
            {
                return(false);
            }
            if (To != other.To)
            {
                return(false);
            }
            if (Number.DoubleToInt64Bits(Weight) != Number
                .DoubleToInt64Bits(other.Weight))
            {
                return(false);
            }
            return(true);
        }
예제 #2
0
        public virtual IList <SegToken> GetShortPath()
        {
            int current;
            int nodeCount             = ToCount;
            IList <PathNode> path     = new List <PathNode>();
            PathNode         zeroPath = new PathNode();

            zeroPath.Weight  = 0;
            zeroPath.PreNode = 0;
            path.Add(zeroPath);
            for (current = 1; current <= nodeCount; current++)
            {
                double weight;
                IList <SegTokenPair> edges = GetToList(current);

                double       minWeight = double.MaxValue;
                SegTokenPair minEdge   = null;
                foreach (SegTokenPair edge in edges)
                {
                    weight = edge.Weight;
                    PathNode preNode2 = path[edge.From];
                    if (preNode2.Weight + weight < minWeight)
                    {
                        minWeight = preNode2.Weight + weight;
                        minEdge   = edge;
                    }
                }
                PathNode newNode = new PathNode();
                newNode.Weight  = minWeight;
                newNode.PreNode = minEdge.From;
                path.Add(newNode);
            }

            // Calculate PathNodes
            int preNode, lastNode;

            lastNode = path.Count - 1;
            current  = lastNode;
            IList <int>      rpath      = new List <int>();
            IList <SegToken> resultPath = new List <SegToken>();

            rpath.Add(current);
            while (current != 0)
            {
                PathNode currentPathNode = path[current];
                preNode = currentPathNode.PreNode;
                rpath.Add(preNode);
                current = preNode;
            }
            for (int j = rpath.Count - 1; j >= 0; j--)
            {
                //int idInteger = rpath.get(j);
                //int id = idInteger.intValue();
                int      id = rpath[j];
                SegToken t  = segTokenList[id];
                resultPath.Add(t);
            }
            return(resultPath);
        }
예제 #3
0
        /// <summary>
        /// Add a <see cref="SegTokenPair"/>
        /// </summary>
        /// <param name="tokenPair"><see cref="SegTokenPair"/></param>
        public virtual void AddSegTokenPair(SegTokenPair tokenPair)
        {
            int to = tokenPair.To;

            if (!IsToExist(to))
            {
                List <SegTokenPair> newlist = new List <SegTokenPair>();
                newlist.Add(tokenPair);
                tokenPairListTable[to] = newlist;
            }
            else
            {
                IList <SegTokenPair> tokenPairList = tokenPairListTable[to];
                tokenPairList.Add(tokenPair);
            }
        }
예제 #4
0
        /// <summary>
        /// Generate a <see cref="BiSegGraph"/> based upon a <see cref="SegGraph"/>
        /// </summary>
        private void GenerateBiSegGraph(SegGraph segGraph)
        {
            double smooth = 0.1;
            int    wordPairFreq = 0;
            int    maxStart = segGraph.MaxStart;
            double oneWordFreq, weight, tinyDouble = 1.0 / Utility.MAX_FREQUENCE;

            int next;

            char[] idBuffer;
            // get the list of tokens ordered and indexed
            segTokenList = segGraph.MakeIndex();
            // Because the beginning position of startToken is -1, therefore startToken can be obtained when key = -1
            int key                     = -1;
            IList <SegToken> nextTokens = null;

            while (key < maxStart)
            {
                if (segGraph.IsStartExist(key))
                {
                    IList <SegToken> tokenList = segGraph.GetStartList(key);

                    // Calculate all tokens for a given key.
                    foreach (SegToken t1 in tokenList)
                    {
                        oneWordFreq = t1.Weight;
                        next        = t1.EndOffset;
                        nextTokens  = null;
                        // Find the next corresponding Token.
                        // For example: "Sunny seashore", the present Token is "sunny", next one should be "sea" or "seashore".
                        // If we cannot find the next Token, then go to the end and repeat the same cycle.
                        while (next <= maxStart)
                        {
                            // Because the beginning position of endToken is sentenceLen, so equal to sentenceLen can find endToken.
                            if (segGraph.IsStartExist(next))
                            {
                                nextTokens = segGraph.GetStartList(next);
                                break;
                            }
                            next++;
                        }
                        if (nextTokens == null)
                        {
                            break;
                        }
                        foreach (SegToken t2 in nextTokens)
                        {
                            idBuffer = new char[t1.CharArray.Length + t2.CharArray.Length + 1];
                            System.Array.Copy(t1.CharArray, 0, idBuffer, 0, t1.CharArray.Length);
                            idBuffer[t1.CharArray.Length] = BigramDictionary.WORD_SEGMENT_CHAR;
                            System.Array.Copy(t2.CharArray, 0, idBuffer,
                                              t1.CharArray.Length + 1, t2.CharArray.Length);

                            // Two linked Words frequency
                            wordPairFreq = bigramDict.GetFrequency(idBuffer);

                            // Smoothing

                            // -log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
                            weight = -Math
                                     .Log(smooth
                                          * (1.0 + oneWordFreq)
                                          / (Utility.MAX_FREQUENCE + 0.0)
                                          + (1.0 - smooth)
                                          * ((1.0 - tinyDouble) * wordPairFreq / (1.0 + oneWordFreq) + tinyDouble));

                            SegTokenPair tokenPair = new SegTokenPair(idBuffer, t1.Index,
                                                                      t2.Index, weight);
                            this.AddSegTokenPair(tokenPair);
                        }
                    }
                }
                key++;
            }
        }