コード例 #1
0
ファイル: Discrete.cs プロジェクト: 0xCM/arrows
        /// <summary>
        /// Creates a Discrete distribution which is uniform over values from
        /// start to end inclusive.
        /// </summary>
        /// <param name="numValues">Number of values</param>
        /// <param name="start">The first value included in the distribution</param>
        /// <param name="end">The last value included in the distribution</param>
        /// <returns>Discrete which is uniform over the specified range (and zero elsewhere).</returns>
        public static Discrete UniformInRange(int numValues, int start, int end)
        {
            var probs = PiecewiseVector.Zero(numValues);

            probs.SetToConstantInRange(start, end, 1.0);
            return(new Discrete(probs));
        }
コード例 #2
0
ファイル: Discrete.cs プロジェクト: 0xCM/arrows
        /// <summary>
        /// Creates a Discrete distribution which is uniform over values in
        /// multiple ranges specified by pairs of start and end values. These pairs
        /// are specified as adjacent values in an enumerable whose length must therefore be
        /// even.
        /// </summary>
        /// <param name="numValues">Number of values</param>
        /// <param name="startEndPairs">Sequence of start and end pairs</param>
        /// <returns>Discrete which is uniform over the specified ranges (and zero elsewhere).</returns>
        public static Discrete UniformInRanges(int numValues, IEnumerable <int> startEndPairs)
        {
            var probs = PiecewiseVector.Zero(numValues);

            probs.SetToConstantInRanges(startEndPairs, 1.0);
            return(new Discrete(probs));
        }
コード例 #3
0
ファイル: GenericDiscreteBase.cs プロジェクト: kant2002/infer
        /// <summary>
        /// Creates a Discrete distribution which is uniform over values in
        /// multiple ranges specified by pairs of start and end values. These pairs
        /// are specified as adjacent values in an enumerable whose length must therefore be
        /// even.
        /// </summary>
        /// <param name="startEndPairs">Sequence of start and end pairs</param>
        /// <returns>Discrete which is uniform over the specified range (and zero elsewhere).</returns>
        public static TThis UniformInRanges(IEnumerable <T> startEndPairs)
        {
            TThis res   = new TThis();
            var   probs = PiecewiseVector.Zero(res.Dimension);

            probs.SetToConstantInRanges(startEndPairs.Select(res.ConvertToInt), 1.0);
            res.disc.SetProbs(probs);
            return(res);
        }
コード例 #4
0
ファイル: GenericDiscreteBase.cs プロジェクト: kant2002/infer
        /// <summary>
        /// Creates a Discrete distribution which is uniform over values from
        /// start to end inclusive.
        /// </summary>
        /// <param name="start">The first value included in the distribution</param>
        /// <param name="end">The last value included in the distribution</param>
        /// <returns>Discrete which is uniform over the specified range (and zero elsewhere).</returns>
        public static TThis UniformInRange(T start, T end)
        {
            TThis res   = new TThis();
            var   probs = PiecewiseVector.Zero(res.Dimension);

            probs.SetToConstantInRange(res.ConvertToInt(start), res.ConvertToInt(end), 1.0);
            res.disc.SetProbs(probs);
            return(res);
        }
コード例 #5
0
        /// <summary>
        /// Creates a distribution over characters that correspond to nucleobases.
        /// </summary>
        /// <param name="a">The probability of adenine.</param>
        /// <param name="c">The probability of cytosine.</param>
        /// <param name="g">The probability of guanine.</param>
        /// <param name="t">The probability of thymine.</param>
        /// <returns>The created distribution.</returns>
        private static DiscreteChar NucleobaseDist(double a, double c, double g, double t)
        {
            Vector probs = PiecewiseVector.Zero(char.MaxValue + 1);

            probs['A'] = a;
            probs['C'] = c;
            probs['G'] = g;
            probs['T'] = t;

            return(DiscreteChar.FromVector(probs));
        }
コード例 #6
0
ファイル: GenericDiscreteBase.cs プロジェクト: kant2002/infer
        /// <summary>
        /// Creates a Discrete distribution which is uniform over the specified set of values and zero elsewhere.
        /// </summary>
        /// <param name="values">The values.</param>
        /// <returns>Discrete which is uniform over the specified set of values and zero elsewhere.</returns>
        public static TThis UniformOver(IEnumerable <T> values)
        {
            TThis res   = new TThis();
            var   probs = PiecewiseVector.Zero(res.Dimension);

            foreach (T value in values)
            {
                probs[res.ConvertToInt(value)] = 1.0;
            }

            res.disc.SetProbs(probs);
            return(res);
        }
コード例 #7
0
ファイル: SingleOp.cs プロジェクト: ScriptBox21/dotnet-infer
        /// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="SingleOp"]/message_doc[@name="CharacterAverageConditional(StringDistribution)"]/*'/>
        public static DiscreteChar CharacterAverageConditional(StringDistribution str)
        {
            Argument.CheckIfNotNull(str, "str");

            if (str.IsPointMass)
            {
                return(CharacterAverageConditional(str.Point));
            }

            Vector          resultLogProb = PiecewiseVector.Constant(char.MaxValue + 1, double.NegativeInfinity);
            StringAutomaton probFunc      = str.GetWorkspaceOrPoint();

            StringAutomaton.EpsilonClosure startEpsilonClosure = new Automaton <string, char, DiscreteChar, StringManipulator, StringAutomaton> .EpsilonClosure(probFunc, probFunc.Start);

            for (int stateIndex = 0; stateIndex < startEpsilonClosure.Size; ++stateIndex)
            {
                StringAutomaton.State state = startEpsilonClosure.GetStateByIndex(stateIndex);
                Weight stateLogWeight       = startEpsilonClosure.GetStateWeightByIndex(stateIndex);
                foreach (var transition in state.Transitions)
                {
                    if (!transition.IsEpsilon)
                    {
                        StringAutomaton.State          destState        = probFunc.States[transition.DestinationStateIndex];
                        StringAutomaton.EpsilonClosure destStateClosure = new Automaton <string, char, DiscreteChar, StringManipulator, StringAutomaton> .EpsilonClosure(probFunc, destState);

                        if (!destStateClosure.EndWeight.IsZero)
                        {
                            Weight weight   = Weight.Product(stateLogWeight, transition.Weight, destStateClosure.EndWeight);
                            var    logProbs = transition.ElementDistribution.Value.GetProbs();
                            logProbs.SetToFunction(logProbs, Math.Log);
                            resultLogProb = LogSumExp(resultLogProb, logProbs, weight);
                        }
                    }
                }
            }

            if (resultLogProb.All(double.IsNegativeInfinity))
            {
                throw new AllZeroException("An input distribution assigns zero probability to all single character strings.");
            }

            Vector resultProb    = PiecewiseVector.Zero(char.MaxValue + 1);
            double logNormalizer = resultLogProb.LogSumExp();

            resultProb.SetToFunction(resultLogProb, lp => Math.Exp(lp - logNormalizer));
            return(DiscreteChar.FromVector(resultProb));
        }
コード例 #8
0
        public void Char()
        {
            var charDist1 = StringDistribution.Char('a');

            StringInferenceTestUtilities.TestProbability(charDist1, 1.0, "a");
            StringInferenceTestUtilities.TestProbability(charDist1, 0.0, "aa", string.Empty);

            var charDist2 = StringDistribution.Char(DiscreteChar.InRange('a', 'c'));

            StringInferenceTestUtilities.TestProbability(charDist2, 1.0 / 3.0, "a", "b", "c");
            StringInferenceTestUtilities.TestProbability(charDist2, 0.0, "ab", string.Empty);

            Vector charProbs3 = PiecewiseVector.Zero(char.MaxValue + 1);

            charProbs3['a'] = 0.1;
            charProbs3['b'] = 0.9;
            var charDist3 = StringDistribution.SingleElement(DiscreteChar.FromVector(charProbs3));

            StringInferenceTestUtilities.TestProbability(charDist3, 0.1, "a");
            StringInferenceTestUtilities.TestProbability(charDist3, 0.9, "b");
            StringInferenceTestUtilities.TestProbability(charDist3, 0.0, "c", "ab", string.Empty);
        }
コード例 #9
0
ファイル: SingleOp.cs プロジェクト: mesgarpour/ERMER
        /// <summary>EP message to <c>character</c>.</summary>
        /// <param name="str">Incoming message from <c>str</c>.</param>
        /// <returns>The outgoing EP message to the <c>character</c> argument.</returns>
        /// <remarks>
        ///   <para>The outgoing message is a distribution matching the moments of <c>character</c> as the random arguments are varied. The formula is <c>proj[p(character) sum_(str) p(str) factor(character,str)]/p(character)</c>.</para>
        /// </remarks>
        public static DiscreteChar CharacterAverageConditional(StringDistribution str)
        {
            Argument.CheckIfNotNull(str, "str");

            Vector          resultlogProb = PiecewiseVector.Constant(char.MaxValue + 1, double.NegativeInfinity);
            StringAutomaton probFunc      = str.GetProbabilityFunction();

            StringAutomaton.EpsilonClosure startEpsilonClosure = probFunc.Start.GetEpsilonClosure();
            for (int stateIndex = 0; stateIndex < startEpsilonClosure.Size; ++stateIndex)
            {
                StringAutomaton.State state = startEpsilonClosure.GetStateByIndex(stateIndex);
                double stateLogWeight       = startEpsilonClosure.GetStateLogWeightByIndex(stateIndex);
                for (int transitionIndex = 0; transitionIndex < state.Transitions.Count; ++transitionIndex)
                {
                    StringAutomaton.Transition transition = state.Transitions[transitionIndex];
                    if (!transition.IsEpsilon)
                    {
                        StringAutomaton.State          destState        = probFunc.States[transition.DestinationStateIndex];
                        StringAutomaton.EpsilonClosure destStateClosure = destState.GetEpsilonClosure();
                        if (!double.IsNegativeInfinity(destStateClosure.EndLogWeight))
                        {
                            double logWeight = stateLogWeight + transition.LogWeight + destStateClosure.EndLogWeight;
                            resultlogProb = LogSumExp(resultlogProb, transition.ElementDistribution.GetInternalDiscrete().GetLogProbs(), logWeight);
                        }
                    }
                }
            }

            if (resultlogProb.All(double.IsNegativeInfinity))
            {
                throw new AllZeroException("An input distribution assigns zero probability to all single character strings.");
            }

            Vector resultProb = PiecewiseVector.Zero(char.MaxValue + 1);

            resultProb.SetToFunction(resultlogProb, Math.Exp);
            return(DiscreteChar.FromVector(resultProb));
        }
コード例 #10
0
        /// <summary>
        /// Creates a distribution which is uniform over all characters
        /// that have zero probability under this distribution
        /// i.e. that are not 'in' this distribution.
        /// </summary>
        /// <remarks>
        /// This is useful for defining characters that are not in a particular distribution
        /// e.g. not a letter or not a word character.
        /// </remarks>
        /// <returns>The created distribution.</returns>
        public DiscreteChar Complement()
        {
            // This creates a vector whose common value is not zero,
            // but where the piece values are zero.  This is useful when
            // displaying the distribution (to show that it is a 'complement')
            // but may have unforeseen side effects e.g. on performance.
            // todo: consider revisiting this design.
            PiecewiseVector res;

            if (this.IsPointMass)
            {
                res             = PiecewiseVector.Constant(this.Dimension, 1.0);
                res[this.Point] = 0;
            }
            else
            {
                res = PiecewiseVector.Zero(this.Dimension);
                res.SetToFunction(this.disc.GetWorkspace(), x => x == 0.0 ? 1.0 : 0.0);
            }

            var comp = DiscreteChar.FromVector(res);

            return(comp);
        }
コード例 #11
0
        /// <summary>
        /// The entry point of the motif finder.
        /// </summary>
        public static void Main()
        {
            Rand.Restart(1337);

            const int    SequenceCount            = 50;
            const int    SequenceLength           = 25;
            const double MotifPresenceProbability = 0.8;

            //// Sample some data

            var trueMotifNucleobaseDist = new[]
            {
                NucleobaseDist(a: 0.8, c: 0.1, g: 0.05, t: 0.05),
                NucleobaseDist(a: 0.0, c: 0.9, g: 0.05, t: 0.05),
                NucleobaseDist(a: 0.0, c: 0.0, g: 0.5, t: 0.5),
                NucleobaseDist(a: 0.25, c: 0.25, g: 0.25, t: 0.25),
                NucleobaseDist(a: 0.1, c: 0.1, g: 0.1, t: 0.7),
                NucleobaseDist(a: 0.0, c: 0.0, g: 0.9, t: 0.1),
                NucleobaseDist(a: 0.9, c: 0.05, g: 0.0, t: 0.05),
                NucleobaseDist(a: 0.5, c: 0.5, g: 0.0, t: 0.0),
            };

            int motifLength = trueMotifNucleobaseDist.Length;
            var backgroundNucleobaseDist = NucleobaseDist(a: 0.25, c: 0.25, g: 0.25, t: 0.25);

            string[] sequenceData;
            int[]    motifPositionData;
            SampleMotifData(
                SequenceCount,
                SequenceLength,
                MotifPresenceProbability,
                trueMotifNucleobaseDist,
                backgroundNucleobaseDist,
                out sequenceData,
                out motifPositionData);

            //// Specify the model

            Vector motifNucleobasePseudoCounts = PiecewiseVector.Constant(char.MaxValue + 1, 1e-6);

            motifNucleobasePseudoCounts['A'] = motifNucleobasePseudoCounts['C'] = motifNucleobasePseudoCounts['G'] = motifNucleobasePseudoCounts['T'] = 2.0;

            Range motifCharsRange = new Range(motifLength);
            VariableArray <Vector> motifNucleobaseProbs = Variable.Array <Vector>(motifCharsRange);

            motifNucleobaseProbs[motifCharsRange] = Variable.Dirichlet(motifNucleobasePseudoCounts).ForEach(motifCharsRange);

            var sequenceRange = new Range(SequenceCount);
            VariableArray <string> sequences = Variable.Array <string>(sequenceRange);

            VariableArray <int> motifPositions = Variable.Array <int>(sequenceRange);

            motifPositions[sequenceRange] = Variable.DiscreteUniform(SequenceLength - motifLength + 1).ForEach(sequenceRange);

            VariableArray <bool> motifPresence = Variable.Array <bool>(sequenceRange);

            motifPresence[sequenceRange] = Variable.Bernoulli(MotifPresenceProbability).ForEach(sequenceRange);

            using (Variable.ForEach(sequenceRange))
            {
                using (Variable.If(motifPresence[sequenceRange]))
                {
                    var motifChars = Variable.Array <char>(motifCharsRange);
                    motifChars[motifCharsRange] = Variable.Char(motifNucleobaseProbs[motifCharsRange]);
                    var motif = Variable.StringFromArray(motifChars);

                    var backgroundLengthRight = SequenceLength - motifLength - motifPositions[sequenceRange];
                    var backgroundLeft        = Variable.StringOfLength(motifPositions[sequenceRange], backgroundNucleobaseDist);
                    var backgroundRight       = Variable.StringOfLength(backgroundLengthRight, backgroundNucleobaseDist);

                    sequences[sequenceRange] = backgroundLeft + motif + backgroundRight;
                }

                using (Variable.IfNot(motifPresence[sequenceRange]))
                {
                    sequences[sequenceRange] = Variable.StringOfLength(SequenceLength, backgroundNucleobaseDist);
                }
            }

            //// Infer the motif from sampled data

            sequences.ObservedValue = sequenceData;

            var engine = new InferenceEngine();

            engine.Algorithm                   = new ExpectationPropagation();
            engine.NumberOfIterations          = 30;
            engine.Compiler.RecommendedQuality = QualityBand.Experimental;

            var motifNucleobaseProbsPosterior = engine.Infer <IList <Dirichlet> >(motifNucleobaseProbs);
            var motifPresencePosterior        = engine.Infer <IList <Bernoulli> >(motifPresence);
            var motifPositionPosterior        = engine.Infer <IList <Discrete> >(motifPositions);

            //// Output inference results

            PrintMotifInferenceResults(
                sequenceData,
                motifPositionData,
                trueMotifNucleobaseDist,
                motifNucleobaseProbsPosterior,
                motifPresencePosterior,
                motifPositionPosterior);

            //// Keep the application alive until the user enters a keystroke

            Console.ReadKey();
        }