/// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="SingleOp"]/message_doc[@name="CharacterAverageConditional(StringDistribution)"]/*'/> public static DiscreteChar CharacterAverageConditional(StringDistribution str) { Argument.CheckIfNotNull(str, "str"); if (str.IsPointMass) { return(CharacterAverageConditional(str.Point)); } Vector resultLogProb = PiecewiseVector.Constant(char.MaxValue + 1, double.NegativeInfinity); StringAutomaton probFunc = str.GetWorkspaceOrPoint(); StringAutomaton.EpsilonClosure startEpsilonClosure = new Automaton <string, char, DiscreteChar, StringManipulator, StringAutomaton> .EpsilonClosure(probFunc, probFunc.Start); for (int stateIndex = 0; stateIndex < startEpsilonClosure.Size; ++stateIndex) { StringAutomaton.State state = startEpsilonClosure.GetStateByIndex(stateIndex); Weight stateLogWeight = startEpsilonClosure.GetStateWeightByIndex(stateIndex); foreach (var transition in state.Transitions) { if (!transition.IsEpsilon) { StringAutomaton.State destState = probFunc.States[transition.DestinationStateIndex]; StringAutomaton.EpsilonClosure destStateClosure = new Automaton <string, char, DiscreteChar, StringManipulator, StringAutomaton> .EpsilonClosure(probFunc, destState); if (!destStateClosure.EndWeight.IsZero) { Weight weight = Weight.Product(stateLogWeight, transition.Weight, destStateClosure.EndWeight); var logProbs = transition.ElementDistribution.Value.GetProbs(); logProbs.SetToFunction(logProbs, Math.Log); resultLogProb = LogSumExp(resultLogProb, logProbs, weight); } } } } if (resultLogProb.All(double.IsNegativeInfinity)) { throw new AllZeroException("An input distribution assigns zero probability to all single character strings."); } Vector resultProb = PiecewiseVector.Zero(char.MaxValue + 1); double logNormalizer = resultLogProb.LogSumExp(); resultProb.SetToFunction(resultLogProb, lp => Math.Exp(lp - logNormalizer)); return(DiscreteChar.FromVector(resultProb)); }
/// <summary>EP message to <c>character</c>.</summary> /// <param name="str">Incoming message from <c>str</c>.</param> /// <returns>The outgoing EP message to the <c>character</c> argument.</returns> /// <remarks> /// <para>The outgoing message is a distribution matching the moments of <c>character</c> as the random arguments are varied. The formula is <c>proj[p(character) sum_(str) p(str) factor(character,str)]/p(character)</c>.</para> /// </remarks> public static DiscreteChar CharacterAverageConditional(StringDistribution str) { Argument.CheckIfNotNull(str, "str"); Vector resultlogProb = PiecewiseVector.Constant(char.MaxValue + 1, double.NegativeInfinity); StringAutomaton probFunc = str.GetProbabilityFunction(); StringAutomaton.EpsilonClosure startEpsilonClosure = probFunc.Start.GetEpsilonClosure(); for (int stateIndex = 0; stateIndex < startEpsilonClosure.Size; ++stateIndex) { StringAutomaton.State state = startEpsilonClosure.GetStateByIndex(stateIndex); double stateLogWeight = startEpsilonClosure.GetStateLogWeightByIndex(stateIndex); for (int transitionIndex = 0; transitionIndex < state.Transitions.Count; ++transitionIndex) { StringAutomaton.Transition transition = state.Transitions[transitionIndex]; if (!transition.IsEpsilon) { StringAutomaton.State destState = probFunc.States[transition.DestinationStateIndex]; StringAutomaton.EpsilonClosure destStateClosure = destState.GetEpsilonClosure(); if (!double.IsNegativeInfinity(destStateClosure.EndLogWeight)) { double logWeight = stateLogWeight + transition.LogWeight + destStateClosure.EndLogWeight; resultlogProb = LogSumExp(resultlogProb, transition.ElementDistribution.GetInternalDiscrete().GetLogProbs(), logWeight); } } } } if (resultlogProb.All(double.IsNegativeInfinity)) { throw new AllZeroException("An input distribution assigns zero probability to all single character strings."); } Vector resultProb = PiecewiseVector.Zero(char.MaxValue + 1); resultProb.SetToFunction(resultlogProb, Math.Exp); return(DiscreteChar.FromVector(resultProb)); }
/// <summary> /// Creates a distribution which is uniform over all characters /// that have zero probability under this distribution /// i.e. that are not 'in' this distribution. /// </summary> /// <remarks> /// This is useful for defining characters that are not in a particular distribution /// e.g. not a letter or not a word character. /// </remarks> /// <returns>The created distribution.</returns> public DiscreteChar Complement() { // This creates a vector whose common value is not zero, // but where the piece values are zero. This is useful when // displaying the distribution (to show that it is a 'complement') // but may have unforeseen side effects e.g. on performance. // todo: consider revisiting this design. PiecewiseVector res; if (this.IsPointMass) { res = PiecewiseVector.Constant(this.Dimension, 1.0); res[this.Point] = 0; } else { res = PiecewiseVector.Zero(this.Dimension); res.SetToFunction(this.disc.GetWorkspace(), x => x == 0.0 ? 1.0 : 0.0); } var comp = DiscreteChar.FromVector(res); return(comp); }
/// <summary> /// The entry point of the motif finder. /// </summary> public static void Main() { Rand.Restart(1337); const int SequenceCount = 50; const int SequenceLength = 25; const double MotifPresenceProbability = 0.8; //// Sample some data var trueMotifNucleobaseDist = new[] { NucleobaseDist(a: 0.8, c: 0.1, g: 0.05, t: 0.05), NucleobaseDist(a: 0.0, c: 0.9, g: 0.05, t: 0.05), NucleobaseDist(a: 0.0, c: 0.0, g: 0.5, t: 0.5), NucleobaseDist(a: 0.25, c: 0.25, g: 0.25, t: 0.25), NucleobaseDist(a: 0.1, c: 0.1, g: 0.1, t: 0.7), NucleobaseDist(a: 0.0, c: 0.0, g: 0.9, t: 0.1), NucleobaseDist(a: 0.9, c: 0.05, g: 0.0, t: 0.05), NucleobaseDist(a: 0.5, c: 0.5, g: 0.0, t: 0.0), }; int motifLength = trueMotifNucleobaseDist.Length; var backgroundNucleobaseDist = NucleobaseDist(a: 0.25, c: 0.25, g: 0.25, t: 0.25); string[] sequenceData; int[] motifPositionData; SampleMotifData( SequenceCount, SequenceLength, MotifPresenceProbability, trueMotifNucleobaseDist, backgroundNucleobaseDist, out sequenceData, out motifPositionData); //// Specify the model Vector motifNucleobasePseudoCounts = PiecewiseVector.Constant(char.MaxValue + 1, 1e-6); motifNucleobasePseudoCounts['A'] = motifNucleobasePseudoCounts['C'] = motifNucleobasePseudoCounts['G'] = motifNucleobasePseudoCounts['T'] = 2.0; Range motifCharsRange = new Range(motifLength); VariableArray <Vector> motifNucleobaseProbs = Variable.Array <Vector>(motifCharsRange); motifNucleobaseProbs[motifCharsRange] = Variable.Dirichlet(motifNucleobasePseudoCounts).ForEach(motifCharsRange); var sequenceRange = new Range(SequenceCount); VariableArray <string> sequences = Variable.Array <string>(sequenceRange); VariableArray <int> motifPositions = Variable.Array <int>(sequenceRange); motifPositions[sequenceRange] = Variable.DiscreteUniform(SequenceLength - motifLength + 1).ForEach(sequenceRange); VariableArray <bool> motifPresence = Variable.Array <bool>(sequenceRange); motifPresence[sequenceRange] = Variable.Bernoulli(MotifPresenceProbability).ForEach(sequenceRange); using (Variable.ForEach(sequenceRange)) { using (Variable.If(motifPresence[sequenceRange])) { var motifChars = Variable.Array <char>(motifCharsRange); motifChars[motifCharsRange] = Variable.Char(motifNucleobaseProbs[motifCharsRange]); var motif = Variable.StringFromArray(motifChars); var backgroundLengthRight = SequenceLength - motifLength - motifPositions[sequenceRange]; var backgroundLeft = Variable.StringOfLength(motifPositions[sequenceRange], backgroundNucleobaseDist); var backgroundRight = Variable.StringOfLength(backgroundLengthRight, backgroundNucleobaseDist); sequences[sequenceRange] = backgroundLeft + motif + backgroundRight; } using (Variable.IfNot(motifPresence[sequenceRange])) { sequences[sequenceRange] = Variable.StringOfLength(SequenceLength, backgroundNucleobaseDist); } } //// Infer the motif from sampled data sequences.ObservedValue = sequenceData; var engine = new InferenceEngine(); engine.Algorithm = new ExpectationPropagation(); engine.NumberOfIterations = 30; engine.Compiler.RecommendedQuality = QualityBand.Experimental; var motifNucleobaseProbsPosterior = engine.Infer <IList <Dirichlet> >(motifNucleobaseProbs); var motifPresencePosterior = engine.Infer <IList <Bernoulli> >(motifPresence); var motifPositionPosterior = engine.Infer <IList <Discrete> >(motifPositions); //// Output inference results PrintMotifInferenceResults( sequenceData, motifPositionData, trueMotifNucleobaseDist, motifNucleobaseProbsPosterior, motifPresencePosterior, motifPositionPosterior); //// Keep the application alive until the user enters a keystroke Console.ReadKey(); }