Пример #1
0
        public TerminalRule Clone()
        {
            var clone = new TerminalRule ();

            clone.tag = tag;
            clone.word = word;
            clone.scores = (double[])scores.Clone ();

            return clone;
        }
Пример #2
0
        public TerminalRule MergeSymbols(int[][] trace, double[][] symbolProbs, double[][] projectedSymbolProbs)
        {
            int ptagcount = trace [tag] [trace [tag].Length - 1] + 1;

            var newScores = new double[ptagcount];

            ArrayHelper.Fill<double> (newScores, double.NegativeInfinity);

            for (int i = 0; i < scores.Length; ++i) {
                int pp = trace [tag] [i];

                newScores [pp] = MathHelper.LogAdd (newScores [pp], scores [i] + symbolProbs [tag] [i]);
            }

            for (int i = 0; i < newScores.Length; ++i) {
                if (!double.IsNegativeInfinity (newScores [i])
                    && !double.IsNegativeInfinity (projectedSymbolProbs [tag] [i])) {
                    newScores [i] -= projectedSymbolProbs [tag] [i];
                }
            }

            var rule = new TerminalRule ();
            rule.tag = tag;
            rule.word = word;
            rule.scores = newScores;

            return rule;
        }
Пример #3
0
        public TerminalRule SplitSymbols(int[] subtagCount, int[] newSubtagCount, Random RNG, double randomness)
        {
            randomness = 0;
            int parentSplitFactor = subtagCount [tag] == newSubtagCount [tag] ? 1 : 2;
            int childSplitFactor = 1;
            double[] oldScores = scores;
            double[] newScores = new double[newSubtagCount [tag]];

            for (int psubt = 0; psubt < subtagCount[tag]; ++psubt) {
                double score = oldScores [psubt];

                for (int p = 0; p < parentSplitFactor; ++p) {
                    double div = (double)Math.Log (childSplitFactor);
                    double rd = (double)(randomness / 100 * (RNG.NextDouble () - 0.5));

                    int newPsubt = psubt * parentSplitFactor + p;

                    newScores [newPsubt] = score - div + (double)Math.Log (1.0 + rd);
                }
            }

            var newRule = new TerminalRule (newScores, tag, word);

            return newRule;
        }
Пример #4
0
        public TerminalRule CreateRule(int[] newSubtagCount)
        {
            var rule = new TerminalRule ();

            rule.scores = new double[newSubtagCount [tag]];

            ArrayHelper.Fill (rule.scores, double.NegativeInfinity);

            rule.tag = tag;
            rule.word = word;

            return rule;
        }
Пример #5
0
        public void CopyTo(TerminalRule rule)
        {
            rule.word = word;
            rule.tag = tag;

            ArrayHelper.CopyTo (scores, rule.scores);
        }
Пример #6
0
        public static TerminalRule[][] CloneRules(TerminalRule[][] rules)
        {
            if (rules == null)
            {
                return null;
            }

            return rules.Select(
                x => x == null ? null : x.Select(
                    y => y == null ? null : y.Clone()
            ).ToArray()
            ).ToArray();
        }
Пример #7
0
        public LAPCFGrammar(TagSet set, double[][][] brawScores, double[][] urawScores, double[][] trawScores)
        {
            NTCount = set.NTCount;
            PTCount = set.PTCount;
            ROOTID = set.ROOTID;

            var tagCount = TotalTagCount;

            brules = new BinaryRule[tagCount][][];

            for (int l = 0; l < tagCount; ++l)
            {
                if (brawScores [l] == null)
                {
                    continue;
                }

                brules [l] = new BinaryRule[tagCount][];

                for (int r = 0; r < tagCount; ++r)
                {
                    if (brawScores [l] [r] == null)
                    {
                        continue;
                    }

                    brules [l] [r] = new BinaryRule[tagCount];
                    for (int p = 0; p < tagCount; ++p)
                    {
                        if (!double.IsInfinity(brawScores [l] [r] [p]) && !double.IsNaN(brawScores [l] [r] [p]))
                        {
                            double[][][] s = new double[1][][];
                            s [0] = new double[1][];
                            s [0] [0] = new double[1];
                            s [0] [0] [0] = brawScores [l] [r] [p];
                            brules [l] [r] [p] = new BinaryRule(s, p, l, r);
                        }
                    }
                }
            }

            urules = new UnaryRule[tagCount][];

            for (int c = 0; c < tagCount; ++c)
            {
                if (urawScores [c] == null)
                {
                    continue;
                }

                urules [c] = new UnaryRule[tagCount];

                for (int p = 0; p < tagCount; ++p)
                {
                    if (!double.IsNaN(urawScores [c] [p]) && !double.IsInfinity(urawScores [c] [p]))
                    {
                        double[][] s = new double[1][];
                        s [0] = new double[1];
                        s [0] [0] = urawScores [c] [p];
                        urules [c] [p] = new UnaryRule(s, p, c);
                    }
                }
            }

            trules = new TerminalRule[trawScores.Length][];

            for (int w = 0; w < trawScores.Length; ++w)
            {
                if (trawScores [w] == null)
                {
                    continue;
                }

                trules [w] = new TerminalRule[trawScores [w].Length];

                for (int t = 0; t < trules[w].Length; ++t)
                {
                    if (!double.IsNaN(trawScores [w] [t]) && !double.IsInfinity(trawScores [w] [t]))
                    {
                        double[] s = new double[1];
                        s [0] = trawScores [w] [t];
                        trules [w] [t] = new TerminalRule(s, t, w);
                    }
                }
            }

            subTagCounts = new int[tagCount];
            for (int i = 0; i < subTagCounts.Length; ++i)
            {
                subTagCounts [i] = 1;
            }
        }
Пример #8
0
        private void BuildTerminalRule(string ruleString, Vocabulary vocab, TagSet tagSet)
        {
            string[] parts = ruleString.Split(new string[] { "\t", "_" }, StringSplitOptions.RemoveEmptyEntries);

            int tag = tagSet.GetID(parts [0]);
            int subtag = int.Parse(parts [1]);
            int word = vocab.GetId(parts [2]);
            double s = double.Parse(parts [3]);

            if (trules == null)
            {
                trules = new TerminalRule[vocab.VocabSize][];
            }

            if (trules [word] == null)
            {
                trules [word] = new TerminalRule[tagSet.PTCount];
            }

            if (trules [word] [tag] == null)
            {
                trules [word] [tag] = new TerminalRule(new double[subTagCounts [tag]], tag, word);
                trules [word] [tag].ClearScore();
            }

            trules [word] [tag].scores [subtag] = s;
        }