Beispiel #1
0
        private unsafe void DoViterbi(char *str, int len, MeCabLattice <TNode> lattice, bool withAllPath)
        {
            var begin = str;
            var end   = str + len;

            for (int pos = 0; pos < len; pos++)
            {
                if (lattice.EndNodeList[pos] != null)
                {
                    var rNode = tokenizer.Lookup(begin, end, lattice);
                    lattice.BeginNodeList[pos] = rNode;
                    this.Connect(pos, rNode, lattice.EndNodeList, withAllPath);
                }

                begin++;
            }

            for (int pos = len; pos >= 0; pos--)
            {
                if (lattice.EndNodeList[pos] != null)
                {
                    this.Connect(pos, lattice.EosNode, lattice.EndNodeList, withAllPath);
                    break;
                }
            }
        }
        /// <summary>
        /// Applies constraints to a lattice to plune it.
        /// </summary>
        /// <param name="lattice">The lattice constraints are applied to.</param>
        public void ApplyTo(MeCabLattice <TNode> lattice)
        {
            var remover = new NodeRemover <TNode>(lattice);

            foreach (var pair in Boundaries)
            {
                var index = pair.Key;
                switch (pair.Value)
                {
                case BoundaryType.Boundary:
                    remover.RemoveUnsatisfied(node => node.BPos <index && node.EPos> index);
                    break;

                case BoundaryType.Insdie:
                    remover.RemoveBeginningAt(index);
                    remover.RemoveEndingAt(index);
                    break;

                default:
                    throw new ApplicationException("Internal error.");
                }
            }
            foreach (var condition in Conditions)
            {
                remover.RemoveUnsatisfied(condition);
            }
            remover.DoRemoves();
        }
Beispiel #3
0
        private static unsafe void ForwardBackward(MeCabLattice <TNode> lattice)
        {
            for (int pos = 0; pos < lattice.BeginNodeList.Length; pos++)
            {
                for (var node = lattice.BeginNodeList[pos]; node != null; node = node.BNext)
                {
                    CalcAlpha(node, lattice.Param.Theta);
                }
            }

            for (int pos = lattice.EndNodeList.Length - 1; pos >= 0; pos--)
            {
                for (var node = lattice.EndNodeList[pos]; node != null; node = node.ENext)
                {
                    CalcBeta(node, lattice.Param.Theta);
                }
            }

            lattice.Z = lattice.EosNode.Alpha; // alpha of EOS

            for (int pos = 0; pos < lattice.BeginNodeList.Length; pos++)
            {
                for (var node = lattice.BeginNodeList[pos]; node != null; node = node.BNext)
                {
                    node.Prob = (float)Math.Exp(node.Alpha + node.Beta - lattice.Z);
                }
            }

            void CalcAlpha(TNode n, double beta)
            {
                n.Alpha = 0f;
                for (var path = n.LPath; path != null; path = path.LNext)
                {
                    n.Alpha = (float)Utils.LogSumExp(n.Alpha,
                                                     -beta * path.Cost + path.LNode.Alpha,
                                                     path == n.LPath);
                }
            }

            void CalcBeta(TNode n, double beta)
            {
                n.Beta = 0f;
                for (var path = n.RPath; path != null; path = path.RNext)
                {
                    n.Beta = (float)Utils.LogSumExp(n.Beta,
                                                    -beta * path.Cost + path.RNode.Beta,
                                                    path == n.RPath);
                }
            }
        }
Beispiel #4
0
        private unsafe void DoViterbi(char *str, int len, MeCabLattice <TNode> lattice, bool withAllPath)
        {
            var   enc        = this.tokenizer.Encoding;
            int   bytesLen   = enc.GetByteCount(str, len);
            byte *bytesBegin = stackalloc byte[bytesLen];

            if (len > 0)
            {
                enc.GetBytes(str, len, bytesBegin, bytesLen);
            }
            byte *bytesEnd = bytesBegin + bytesLen;
            char *begin    = str;
            char *end      = str + len;

            for (int pos = 0; pos < len; pos++)
            {
                if (lattice.EndNodeList[pos] != null)
                {
                    var rNode = tokenizer.Lookup(begin,
                                                 end,
                                                 bytesBegin,
                                                 bytesEnd,
                                                 lattice.Param,
                                                 lattice.nodeAllocator);
                    lattice.BeginNodeList[pos] = rNode;
                    this.Connect(pos, rNode, lattice.EndNodeList, withAllPath);
                }

                bytesBegin += enc.GetByteCount(begin, 1);
                begin++;
            }

            for (int pos = len; pos >= 0; pos--)
            {
                if (lattice.EndNodeList[pos] != null)
                {
                    this.Connect(pos, lattice.EosNode, lattice.EndNodeList, withAllPath);
                    break;
                }
            }
        }
Beispiel #5
0
        private void BuildBestLattice(MeCabLattice <TNode> lattice)
        {
            var current = lattice.EosNode;
            var prev    = current.Prev;

            prev.Next = current;

            current = prev;
            prev    = current.Prev;

            while (prev != null)
            {
                current.IsBest = true;
                lattice.BestResultStack.Push(current);
                prev.Next = current;

                current = prev;
                prev    = current.Prev;
            }

            lattice.BosNode.Next = current;
        }
Beispiel #6
0
        public unsafe void Analyze(char *str, int len, MeCabLattice <TNode> lattice)
        {
            switch (lattice.Param.LatticeLevel)
            {
            case MeCabLatticeLevel.Zero:
                this.DoViterbi(str, len, lattice, false);
                break;

            case MeCabLatticeLevel.One:
                this.DoViterbi(str, len, lattice, true);
                break;

            case MeCabLatticeLevel.Two:
                this.DoViterbi(str, len, lattice, true);
                ForwardBackward(lattice);
                break;

            default:
                throw new ArgumentOutOfRangeException(nameof(lattice.Param.LatticeLevel));
            }

            BuildBestLattice(lattice.BosNode, lattice.EosNode, lattice.BestResultStack);
        }
Beispiel #7
0
        public unsafe TNode Lookup(char *begin, char *end, MeCabLattice <TNode> lattice)
        {
            CharInfo cInfo;
            int      cLen;

            if (end - begin > ushort.MaxValue)
            {
                end = begin + ushort.MaxValue;
            }
            char *begin2 = property.SeekToOtherType(begin, end, this.space, &cInfo, &cLen);

            if (begin2 >= end)
            {
                return(null);
            }

            TNode resultNode = null;
            var   daResults  = stackalloc DoubleArray.ResultPair[DAResultSize];

            foreach (MeCabDictionary it in this.dic)
            {
                int n = it.CommonPrefixSearch(begin2, (int)(end - begin2), daResults, DAResultSize);
                for (int i = 0; i < n; i++)
                {
#if MMF_DIC
                    var tokenSize = it.GetTokenSize(daResults->Value);
                    var tokens    = it.GetTokens(daResults->Value);
                    for (int j = 0; j < tokenSize; j++)
#else
                    var seg    = it.GetTokens(daResults->Value);
                    var tokens = seg.Array;
                    for (int j = seg.Offset; j < seg.Offset + seg.Count; j++)
#endif
                    {
                        var newNode = lattice.CreateNewNode();
                        this.ReadNodeInfo(it, tokens[j], newNode);
                        newNode.Length   = daResults->Length;
                        newNode.RLength  = (int)(begin2 - begin) + daResults->Length;
                        newNode.Surface  = new string(begin2, 0, newNode.Length);
                        newNode.Stat     = MeCabNodeStat.Nor;
                        newNode.CharType = cInfo.DefaultType;
                        newNode.BNext    = resultNode;
                        resultNode       = newNode;
                    }
                    daResults++;
                }
            }

            if (resultNode != null && !cInfo.Invoke)
            {
                return(resultNode);
            }

            char *begin3      = begin2 + 1;
            char *groupBegin3 = null;

            if (cInfo.Group)
            {
                char *   tmp = begin3;
                CharInfo fail;
                begin3 = this.property.SeekToOtherType(begin3, end, cInfo, &fail, &cLen);
                if (cLen <= lattice.Param.MaxGroupingSize)
                {
                    this.AddUnknown(ref resultNode, cInfo, begin, begin2, begin3, lattice);
                }
                groupBegin3 = begin3;
                begin3      = tmp;
            }

            for (int i = 1; i <= cInfo.Length; i++)
            {
                if (begin3 > end)
                {
                    break;
                }
                if (begin3 == groupBegin3)
                {
                    continue;
                }
                cLen = i;
                this.AddUnknown(ref resultNode, cInfo, begin, begin2, begin3, lattice);
                if (!cInfo.IsKindOf(this.property.GetCharInfo(*begin3)))
                {
                    break;
                }
                begin3 += 1;
            }

            if (resultNode == null)
            {
                this.AddUnknown(ref resultNode, cInfo, begin, begin2, begin3, lattice);
            }

            return(resultNode);
        }
Beispiel #8
0
 /// <summary>
 /// Creates an instance to remove nodes from a lattice.
 /// </summary>
 /// <param name="lattice">Lattice to remove nodes from.</param>
 public NodeRemover(MeCabLattice <TNode> lattice)
 {
     Lattice = lattice;
     List    = new HashSet <TNode>(ReferenceEqualityComparer.Instance);
 }