private WordGraphConfidences ComputeWordGraphConfidences(WordGraph wordGraph) { double normalizationFactor = LogSpace.Zero; var backwardProbs = new double[wordGraph.Arcs.Count]; for (int i = wordGraph.Arcs.Count - 1; i >= 0; i--) { WordGraphArc arc = wordGraph.Arcs[i]; double sum = LogSpace.One; if (!wordGraph.FinalStates.Contains(arc.NextState)) { sum = LogSpace.Zero; foreach (int nextArcIndex in wordGraph.GetNextArcIndices(arc.NextState)) { WordGraphArc nextArc = wordGraph.Arcs[nextArcIndex]; sum = LogSpace.Add(sum, LogSpace.Multiple(nextArc.Score, backwardProbs[nextArcIndex])); } } backwardProbs[i] = sum; if (arc.PrevState == WordGraph.InitialState) { normalizationFactor = LogSpace.Add(normalizationFactor, LogSpace.Multiple(arc.Score, backwardProbs[i])); } } var rawWpps = new Dictionary <string, Dictionary <int, double> >(); var forwardProbs = new (double Prob, int Index)[wordGraph.Arcs.Count];
private static void PruneLexTable(string fileName, double threshold) { var entries = new List <Tuple <uint, uint, float> >(); #if THOT_TEXT_FORMAT using (var reader = new StreamReader(fileName)) { string line; while ((line = reader.ReadLine()) != null) { string[] fields = line.Split(' '); entries.Add(Tuple.Create(uint.Parse(fields[0], CultureInfo.InvariantCulture), uint.Parse(fields[1], CultureInfo.InvariantCulture), float.Parse(fields[2], CultureInfo.InvariantCulture))); } } #else using (var reader = new BinaryReader(File.Open(fileName, FileMode.Open))) { int pos = 0; var length = (int)reader.BaseStream.Length; while (pos < length) { uint srcIndex = reader.ReadUInt32(); pos += sizeof(uint); uint trgIndex = reader.ReadUInt32(); pos += sizeof(uint); float numer = reader.ReadSingle(); pos += sizeof(float); reader.ReadSingle(); pos += sizeof(float); entries.Add(Tuple.Create(srcIndex, trgIndex, numer)); } } #endif #if THOT_TEXT_FORMAT using (var writer = new StreamWriter(fileName)) #else using (var writer = new BinaryWriter(File.Open(fileName, FileMode.Create))) #endif { foreach (IGrouping <uint, Tuple <uint, uint, float> > g in entries.GroupBy(e => e.Item1).OrderBy(g => g.Key)) { Tuple <uint, uint, float>[] groupEntries = g.OrderByDescending(e => e.Item3).ToArray(); double lcSrc = groupEntries.Select(e => e.Item3).Skip(1) .Aggregate((double)groupEntries[0].Item3, (a, n) => LogSpace.Add(a, n)); double newLcSrc = -99999; int count = 0; foreach (Tuple <uint, uint, float> entry in groupEntries) { double prob = Math.Exp(entry.Item3 - lcSrc); if (prob < threshold) { break; } newLcSrc = LogSpace.Add(newLcSrc, entry.Item3); count++; } for (int i = 0; i < count; i++) { #if THOT_TEXT_FORMAT writer.Write("{0} {1} {2:0.######} {3:0.######}\n", groupEntries[i].Item1, groupEntries[i].Item2, groupEntries[i].Item3, newLcSrc); #else writer.Write(groupEntries[i].Item1); writer.Write(groupEntries[i].Item2); writer.Write(groupEntries[i].Item3); writer.Write((float)newLcSrc); #endif } } } }