/// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="SingleOp"]/message_doc[@name="CharacterAverageConditional(StringDistribution)"]/*'/> public static DiscreteChar CharacterAverageConditional(StringDistribution str) { Argument.CheckIfNotNull(str, "str"); if (str.IsPointMass) { return(CharacterAverageConditional(str.Point)); } Vector resultLogProb = PiecewiseVector.Constant(char.MaxValue + 1, double.NegativeInfinity); StringAutomaton probFunc = str.GetWorkspaceOrPoint(); StringAutomaton.EpsilonClosure startEpsilonClosure = new Automaton <string, char, DiscreteChar, StringManipulator, StringAutomaton> .EpsilonClosure(probFunc, probFunc.Start); for (int stateIndex = 0; stateIndex < startEpsilonClosure.Size; ++stateIndex) { StringAutomaton.State state = startEpsilonClosure.GetStateByIndex(stateIndex); Weight stateLogWeight = startEpsilonClosure.GetStateWeightByIndex(stateIndex); foreach (var transition in state.Transitions) { if (!transition.IsEpsilon) { StringAutomaton.State destState = probFunc.States[transition.DestinationStateIndex]; StringAutomaton.EpsilonClosure destStateClosure = new Automaton <string, char, DiscreteChar, StringManipulator, StringAutomaton> .EpsilonClosure(probFunc, destState); if (!destStateClosure.EndWeight.IsZero) { Weight weight = Weight.Product(stateLogWeight, transition.Weight, destStateClosure.EndWeight); var logProbs = transition.ElementDistribution.Value.GetProbs(); logProbs.SetToFunction(logProbs, Math.Log); resultLogProb = LogSumExp(resultLogProb, logProbs, weight); } } } } if (resultLogProb.All(double.IsNegativeInfinity)) { throw new AllZeroException("An input distribution assigns zero probability to all single character strings."); } Vector resultProb = PiecewiseVector.Zero(char.MaxValue + 1); double logNormalizer = resultLogProb.LogSumExp(); resultProb.SetToFunction(resultLogProb, lp => Math.Exp(lp - logNormalizer)); return(DiscreteChar.FromVector(resultProb)); }
public void Char() { var charDist1 = StringDistribution.Char('a'); StringInferenceTestUtilities.TestProbability(charDist1, 1.0, "a"); StringInferenceTestUtilities.TestProbability(charDist1, 0.0, "aa", string.Empty); var charDist2 = StringDistribution.Char(DiscreteChar.InRange('a', 'c')); StringInferenceTestUtilities.TestProbability(charDist2, 1.0 / 3.0, "a", "b", "c"); StringInferenceTestUtilities.TestProbability(charDist2, 0.0, "ab", string.Empty); Vector charProbs3 = PiecewiseVector.Zero(char.MaxValue + 1); charProbs3['a'] = 0.1; charProbs3['b'] = 0.9; var charDist3 = StringDistribution.SingleElement(DiscreteChar.FromVector(charProbs3)); StringInferenceTestUtilities.TestProbability(charDist3, 0.1, "a"); StringInferenceTestUtilities.TestProbability(charDist3, 0.9, "b"); StringInferenceTestUtilities.TestProbability(charDist3, 0.0, "c", "ab", string.Empty); }
/// <summary>EP message to <c>character</c>.</summary> /// <param name="str">Incoming message from <c>str</c>.</param> /// <returns>The outgoing EP message to the <c>character</c> argument.</returns> /// <remarks> /// <para>The outgoing message is a distribution matching the moments of <c>character</c> as the random arguments are varied. The formula is <c>proj[p(character) sum_(str) p(str) factor(character,str)]/p(character)</c>.</para> /// </remarks> public static DiscreteChar CharacterAverageConditional(StringDistribution str) { Argument.CheckIfNotNull(str, "str"); Vector resultlogProb = PiecewiseVector.Constant(char.MaxValue + 1, double.NegativeInfinity); StringAutomaton probFunc = str.GetProbabilityFunction(); StringAutomaton.EpsilonClosure startEpsilonClosure = probFunc.Start.GetEpsilonClosure(); for (int stateIndex = 0; stateIndex < startEpsilonClosure.Size; ++stateIndex) { StringAutomaton.State state = startEpsilonClosure.GetStateByIndex(stateIndex); double stateLogWeight = startEpsilonClosure.GetStateLogWeightByIndex(stateIndex); for (int transitionIndex = 0; transitionIndex < state.Transitions.Count; ++transitionIndex) { StringAutomaton.Transition transition = state.Transitions[transitionIndex]; if (!transition.IsEpsilon) { StringAutomaton.State destState = probFunc.States[transition.DestinationStateIndex]; StringAutomaton.EpsilonClosure destStateClosure = destState.GetEpsilonClosure(); if (!double.IsNegativeInfinity(destStateClosure.EndLogWeight)) { double logWeight = stateLogWeight + transition.LogWeight + destStateClosure.EndLogWeight; resultlogProb = LogSumExp(resultlogProb, transition.ElementDistribution.GetInternalDiscrete().GetLogProbs(), logWeight); } } } } if (resultlogProb.All(double.IsNegativeInfinity)) { throw new AllZeroException("An input distribution assigns zero probability to all single character strings."); } Vector resultProb = PiecewiseVector.Zero(char.MaxValue + 1); resultProb.SetToFunction(resultlogProb, Math.Exp); return(DiscreteChar.FromVector(resultProb)); }
/// <summary> /// Creates a distribution which is uniform over all characters /// that have zero probability under this distribution /// i.e. that are not 'in' this distribution. /// </summary> /// <remarks> /// This is useful for defining characters that are not in a particular distribution /// e.g. not a letter or not a word character. /// </remarks> /// <returns>The created distribution.</returns> public DiscreteChar Complement() { // This creates a vector whose common value is not zero, // but where the piece values are zero. This is useful when // displaying the distribution (to show that it is a 'complement') // but may have unforeseen side effects e.g. on performance. // todo: consider revisiting this design. PiecewiseVector res; if (this.IsPointMass) { res = PiecewiseVector.Constant(this.Dimension, 1.0); res[this.Point] = 0; } else { res = PiecewiseVector.Zero(this.Dimension); res.SetToFunction(this.disc.GetWorkspace(), x => x == 0.0 ? 1.0 : 0.0); } var comp = DiscreteChar.FromVector(res); return(comp); }