private WordGraphConfidences ComputeWordGraphConfidences(WordGraph wordGraph)
        {
            double normalizationFactor = LogSpace.Zero;
            var    backwardProbs       = new double[wordGraph.Arcs.Count];

            for (int i = wordGraph.Arcs.Count - 1; i >= 0; i--)
            {
                WordGraphArc arc = wordGraph.Arcs[i];
                double       sum = LogSpace.One;
                if (!wordGraph.FinalStates.Contains(arc.NextState))
                {
                    sum = LogSpace.Zero;
                    foreach (int nextArcIndex in wordGraph.GetNextArcIndices(arc.NextState))
                    {
                        WordGraphArc nextArc = wordGraph.Arcs[nextArcIndex];
                        sum = LogSpace.Add(sum, LogSpace.Multiple(nextArc.Score, backwardProbs[nextArcIndex]));
                    }
                }
                backwardProbs[i] = sum;
                if (arc.PrevState == WordGraph.InitialState)
                {
                    normalizationFactor = LogSpace.Add(normalizationFactor,
                                                       LogSpace.Multiple(arc.Score, backwardProbs[i]));
                }
            }

            var rawWpps      = new Dictionary <string, Dictionary <int, double> >();
            var forwardProbs = new (double Prob, int Index)[wordGraph.Arcs.Count];
示例#2
0
        private static void PruneLexTable(string fileName, double threshold)
        {
            var entries = new List <Tuple <uint, uint, float> >();

#if THOT_TEXT_FORMAT
            using (var reader = new StreamReader(fileName))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    string[] fields = line.Split(' ');
                    entries.Add(Tuple.Create(uint.Parse(fields[0], CultureInfo.InvariantCulture),
                                             uint.Parse(fields[1], CultureInfo.InvariantCulture),
                                             float.Parse(fields[2], CultureInfo.InvariantCulture)));
                }
            }
#else
            using (var reader = new BinaryReader(File.Open(fileName, FileMode.Open)))
            {
                int pos    = 0;
                var length = (int)reader.BaseStream.Length;
                while (pos < length)
                {
                    uint srcIndex = reader.ReadUInt32();
                    pos += sizeof(uint);
                    uint trgIndex = reader.ReadUInt32();
                    pos += sizeof(uint);
                    float numer = reader.ReadSingle();
                    pos += sizeof(float);
                    reader.ReadSingle();
                    pos += sizeof(float);

                    entries.Add(Tuple.Create(srcIndex, trgIndex, numer));
                }
            }
#endif

#if THOT_TEXT_FORMAT
            using (var writer = new StreamWriter(fileName))
#else
            using (var writer = new BinaryWriter(File.Open(fileName, FileMode.Create)))
#endif
            {
                foreach (IGrouping <uint, Tuple <uint, uint, float> > g in entries.GroupBy(e => e.Item1).OrderBy(g => g.Key))
                {
                    Tuple <uint, uint, float>[] groupEntries = g.OrderByDescending(e => e.Item3).ToArray();

                    double lcSrc = groupEntries.Select(e => e.Item3).Skip(1)
                                   .Aggregate((double)groupEntries[0].Item3, (a, n) => LogSpace.Add(a, n));

                    double newLcSrc = -99999;
                    int    count    = 0;
                    foreach (Tuple <uint, uint, float> entry in groupEntries)
                    {
                        double prob = Math.Exp(entry.Item3 - lcSrc);
                        if (prob < threshold)
                        {
                            break;
                        }
                        newLcSrc = LogSpace.Add(newLcSrc, entry.Item3);
                        count++;
                    }

                    for (int i = 0; i < count; i++)
                    {
#if THOT_TEXT_FORMAT
                        writer.Write("{0} {1} {2:0.######} {3:0.######}\n", groupEntries[i].Item1,
                                     groupEntries[i].Item2, groupEntries[i].Item3, newLcSrc);
#else
                        writer.Write(groupEntries[i].Item1);
                        writer.Write(groupEntries[i].Item2);
                        writer.Write(groupEntries[i].Item3);
                        writer.Write((float)newLcSrc);
#endif
                    }
                }
            }
        }