Beispiel #1
0
            public MultiRepresentationWeightFunction <TDictionary> Sum(MultiRepresentationWeightFunction <TDictionary> weightFunction)
            {
                if (weightFunction.IsCanonicZero())
                {
                    return(Clone()); // TODO: return `this` when automata become immutable
                }
                if (IsCanonicZero())
                {
                    return(weightFunction.Clone()); // TODO: return weightFunction when automata become immutable
                }
                if (weightFunction.weightFunction is TAutomaton otherAutomaton)
                {
                    return(FromAutomaton(AsAutomaton().Sum(otherAutomaton)));
                }
                if (this.weightFunction is TAutomaton thisAutomaton)
                {
                    return(FromAutomaton(thisAutomaton.Sum(weightFunction.AsAutomaton())));
                }

                // Now both weight functions are either point masses or dictionaries
                var thisDictionary = this.weightFunction as TDictionary ?? DictionaryWeightFunction <TDictionary> .FromPoint(((PointMassWeightFunction)this.weightFunction).Point);

                var otherDictionary = weightFunction.weightFunction as TDictionary ?? DictionaryWeightFunction <TDictionary> .FromPoint(((PointMassWeightFunction)weightFunction.weightFunction).Point);

                var resultDictionary = thisDictionary.Sum(otherDictionary);

                if (resultDictionary.Dictionary.Count <= MaxDictionarySize)
                {
                    return(FromDictionary(resultDictionary));
                }
                else
                {
                    return(FromAutomaton(resultDictionary.AsAutomaton()));
                }
            }
Beispiel #2
0
            public double MaxDiff(MultiRepresentationWeightFunction <TDictionary> that)
            {
                if (IsCanonicZero())
                {
                    return(that.IsZero() ? 0.0 : Math.E);
                }
                if (that.IsCanonicZero())
                {
                    return(IsZero() ? 0.0 : Math.E);
                }

                switch (weightFunction)
                {
                case TAutomaton automaton:
                    return(automaton.MaxDiff(that.AsAutomaton()));

                case TDictionary dictionary:
                    switch (that.weightFunction)
                    {
                    case TAutomaton otherAutomaton:
                        return(AsAutomaton().MaxDiff(otherAutomaton));

                    case TDictionary otherDictionary:
                        return(dictionary.MaxDiff(otherDictionary));

                    case PointMassWeightFunction otherPointMass:
                        return(dictionary.MaxDiff(DictionaryWeightFunction <TDictionary> .FromPoint(otherPointMass.Point)));

                    default:
                        throw new InvalidOperationException("Other function has an invalid type");
                    }

                case PointMassWeightFunction pointMass:
                    switch (that.weightFunction)
                    {
                    case TAutomaton otherAutomaton:
                        return(AsAutomaton().MaxDiff(otherAutomaton));

                    case TDictionary otherDictionary:
                        return(otherDictionary.MaxDiff(DictionaryWeightFunction <TDictionary> .FromPoint(pointMass.Point)));

                    case PointMassWeightFunction otherPointMass:
                        return(pointMass.MaxDiff(otherPointMass));

                    default:
                        throw new InvalidOperationException("Other function has an invalid type");
                    }

                default:
                    throw new InvalidOperationException("Current function has an invalid type");
                }
            }
Beispiel #3
0
            public MultiRepresentationWeightFunction <TDictionary> Repeat(int minTimes = 1, int?maxTimes = null)
            {
                Argument.CheckIfInRange(minTimes >= 0, nameof(minTimes), "The minimum number of repetitions must be non-negative.");
                Argument.CheckIfValid(!maxTimes.HasValue || maxTimes.Value >= minTimes, "The maximum number of repetitions must not be less than the minimum number.");

                if (weightFunction is PointMassWeightFunction pointMass && maxTimes.HasValue && maxTimes - minTimes < MaxDictionarySize)
                {
                    var newSequenceElements = new List <TElement>(SequenceManipulator.GetLength(pointMass.Point) * maxTimes.Value);
                    for (int i = 0; i < minTimes; ++i)
                    {
                        newSequenceElements.AddRange(pointMass.Point);
                    }
                    if (minTimes == maxTimes)
                    {
                        return(FromPoint(SequenceManipulator.ToSequence(newSequenceElements)));
                    }
                    else
                    {
                        Weight uniformWeight = Weight.FromValue(1.0 / (maxTimes.Value - minTimes));
                        Dictionary <TSequence, Weight> dict = new Dictionary <TSequence, Weight>(maxTimes.Value - minTimes + 1);
                        dict.Add(SequenceManipulator.ToSequence(newSequenceElements), uniformWeight);
                        for (int i = minTimes + 1; i <= maxTimes.Value; ++i)
                        {
                            newSequenceElements.AddRange(pointMass.Point);
                            dict.Add(SequenceManipulator.ToSequence(newSequenceElements), uniformWeight);
                        }
                        return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromDistinctWeights(dict)));
                    }
                }
                if (weightFunction is TDictionary dictionary && maxTimes.HasValue)
                {
                    var resultSupportSize = ResultSupportSize(dictionary.Dictionary.Count, minTimes, maxTimes.Value);
                    if (resultSupportSize <= MaxDictionarySize)
                    {
                        return(FromDictionary(dictionary.Repeat(minTimes, maxTimes.Value, (int)resultSupportSize + 1)));
                    }
                }

                return(FromAutomaton(AsAutomaton().Repeat(minTimes, maxTimes)));

                double ResultSupportSize(int sourceSupportSize, int minReps, int maxReps)
                {
                    return(Math.Pow(sourceSupportSize, minReps) * (1 - Math.Pow(sourceSupportSize, maxReps - minReps + 1)) / (1 - sourceSupportSize));
                }
            }
Beispiel #4
0
            public MultiRepresentationWeightFunction <TDictionary> Append(MultiRepresentationWeightFunction <TDictionary> weightFunction, int group = 0)
            {
                if (this.weightFunction == null || weightFunction.weightFunction == null)
                {
                    return(Zero());
                }

                if (group == 0)
                {
                    if (weightFunction.weightFunction is PointMassWeightFunction otherPointMass)
                    {
                        if (this.weightFunction is PointMassWeightFunction thisPointMass)
                        {
                            return(FromPointMass(thisPointMass.Append(otherPointMass.Point)));
                        }
                        if (this.weightFunction is TDictionary thisDictionary)
                        {
                            return(FromDictionary(thisDictionary.Append(otherPointMass.Point)));
                        }
                    }

                    if (weightFunction.weightFunction is TDictionary otherDictionary)
                    {
                        if (this.weightFunction is PointMassWeightFunction thisPointMass)
                        {
                            return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromPoint(thisPointMass.Point).Append(otherDictionary)));
                        }
                        if (this.weightFunction is TDictionary thisDictionary && thisDictionary.Dictionary.Count * otherDictionary.Dictionary.Count <= MaxDictionarySize)
                        {
                            return(FromDictionary(thisDictionary.Append(otherDictionary)));
                        }
                    }
                }

                if (weightFunction.weightFunction is PointMassWeightFunction pointMass)
                {
                    return(FromAutomaton(this.weightFunction.AsAutomaton().Append(pointMass.Point, group)));
                }

                return(FromAutomaton(this.weightFunction.AsAutomaton().Append(weightFunction.weightFunction.AsAutomaton(), group)));
            }
Beispiel #5
0
            public MultiRepresentationWeightFunction <TDictionary> ScaleLog(double logScale)
            {
                switch (weightFunction)
                {
                case null:
                    return(Zero());

                case PointMassWeightFunction pointMass:
                    return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromDistinctWeights(
                                              new[] { new KeyValuePair <TSequence, Weight>(pointMass.Point, Weight.FromLogValue(logScale)) })));

                case TDictionary dictionary:
                    return(FromDictionary(dictionary.ScaleLog(logScale)));

                case TAutomaton automaton:
                    return(FromAutomaton(automaton.ScaleLog(logScale)));

                default:
                    throw new InvalidOperationException("Current function has an invalid type");
                }
            }
Beispiel #6
0
                /// <inheritdoc/>
                public MultiRepresentationWeightFunction <TDictionary> ConstantOnSupportOfLog(double logValue, MultiRepresentationWeightFunction <TDictionary> weightFunction)
                {
                    if (weightFunction.TryEnumerateSupportInternal(MaxDictionarySize, out var support))
                    {
                        if (!support.Any())
                        {
                            return(Zero());
                        }

                        if (logValue == 0 && !support.Skip(1).Any())
                        {
                            return(FromPoint(support.Single()));
                        }

                        var weight = Weight.FromLogValue(logValue);
                        return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromDistinctWeights(
                                                  support.Select(sequence => new KeyValuePair <TSequence, Weight>(sequence, weight)))));
                    }
                    var automaton = weightFunction.AsAutomaton().ConstantOnSupportLog(logValue);

                    return(FromAutomaton(automaton));
                }
Beispiel #7
0
                public MultiRepresentationWeightFunction <TDictionary> FromValues(IEnumerable <KeyValuePair <TSequence, double> > sequenceWeightPairs)
                {
                    var collection = sequenceWeightPairs as ICollection <KeyValuePair <TSequence, double> > ?? sequenceWeightPairs.ToList();

                    if (collection.Count == 0)
                    {
                        return(Zero());
                    }
                    if (collection.Count == 1 && collection.Single().Value == 1.0)
                    {
                        return(FromPoint(collection.Single().Key));
                    }
                    else
                    {
                        if (collection.Count <= MaxDictionarySize)
                        {
                            return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromValues(sequenceWeightPairs)));
                        }
                        else
                        {
                            return(FromAutomaton(Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TAutomaton> .FromValues(collection)));
                        }
                    }
                }
Beispiel #8
0
            public MultiRepresentationWeightFunction <TDictionary> SumLog(double logWeight1, double logWeight2, MultiRepresentationWeightFunction <TDictionary> weightFunction)
            {
                if (weightFunction.IsCanonicZero() || double.IsNegativeInfinity(logWeight2))
                {
                    return(ScaleLog(logWeight1));
                }
                if (IsCanonicZero() || double.IsNegativeInfinity(logWeight1))
                {
                    return(weightFunction.ScaleLog(logWeight2));
                }

                if (weightFunction.weightFunction is TAutomaton otherAutomaton)
                {
                    return(FromAutomaton(AsAutomaton().SumLog(logWeight1, logWeight2, otherAutomaton)));
                }
                if (this.weightFunction is TAutomaton thisAutomaton)
                {
                    return(FromAutomaton(thisAutomaton.SumLog(logWeight1, logWeight2, weightFunction.AsAutomaton())));
                }

                // Now both weight functions are either point masses or dictionaries
                var thisDictionary = this.weightFunction as TDictionary ?? DictionaryWeightFunction <TDictionary> .FromPoint(((PointMassWeightFunction)this.weightFunction).Point);

                var otherDictionary = weightFunction.weightFunction as TDictionary ?? DictionaryWeightFunction <TDictionary> .FromPoint(((PointMassWeightFunction)weightFunction.weightFunction).Point);

                var resultDictionary = thisDictionary.SumLog(logWeight1, logWeight2, otherDictionary);

                if (resultDictionary.Dictionary.Count <= MaxDictionarySize)
                {
                    return(FromDictionary(resultDictionary));
                }
                else
                {
                    return(FromAutomaton(resultDictionary.AsAutomaton()));
                }
            }
Beispiel #9
0
            public MultiRepresentationWeightFunction <TDictionary> Product(MultiRepresentationWeightFunction <TDictionary> weightFunction)
            {
                if (IsCanonicZero() || weightFunction.IsCanonicZero())
                {
                    return(Zero());
                }

                PointMassWeightFunction pointMass = null;
                IWeightFunction         other     = null;

                if (this.weightFunction is PointMassWeightFunction thisPointMass)
                {
                    pointMass = thisPointMass;
                    other     = weightFunction.weightFunction;
                }
                else if (weightFunction.weightFunction is PointMassWeightFunction otherPointMass)
                {
                    pointMass = otherPointMass;
                    other     = this.weightFunction;
                }
                if (pointMass != null && !other.UsesGroups)
                {
                    var logValue = other.GetLogValue(pointMass.Point);
                    if (double.IsNegativeInfinity(logValue))
                    {
                        return(Zero());
                    }
                    else if (logValue == 0.0)
                    {
                        return(FromPointMass(pointMass));
                    }
                    else
                    {
                        return(FromDictionary(
                                   DictionaryWeightFunction <TDictionary> .FromDistinctWeights(
                                       new[] { new KeyValuePair <TSequence, Weight>(pointMass.Point, Weight.FromLogValue(logValue)) })));
                    }
                }

                TDictionary dictionary = null;

                if (this.weightFunction is TDictionary thisDictionary)
                {
                    if (weightFunction.weightFunction is TDictionary secondDictionary)
                    {
                        return(FromDictionary(thisDictionary.Product(secondDictionary)));
                    }

                    dictionary = thisDictionary;
                    other      = weightFunction.weightFunction;
                }
                else if (weightFunction.weightFunction is TDictionary otherDictionary)
                {
                    dictionary = otherDictionary;
                    other      = this.weightFunction;
                }

                if (dictionary != null && !other.UsesGroups)
                {
                    var resultList = new List <KeyValuePair <TSequence, Weight> >(dictionary.Dictionary.Count);
                    foreach (var kvp in dictionary.Dictionary)
                    {
                        if (!kvp.Value.IsZero)
                        {
                            var otherLogValue = other.GetLogValue(kvp.Key);
                            if (!double.IsNegativeInfinity(otherLogValue))
                            {
                                resultList.Add(new KeyValuePair <TSequence, Weight>(kvp.Key, kvp.Value * Weight.FromLogValue(otherLogValue)));
                            }
                        }
                    }
                    if (resultList.Count == 0)
                    {
                        return(Zero());
                    }
                    else if (resultList.Count == 1 && resultList[0].Value.LogValue == 0.0)
                    {
                        return(FromPoint(resultList[0].Key));
                    }
                    else
                    {
                        return(FromDictionary(
                                   DictionaryWeightFunction <TDictionary> .FromDistinctWeights(resultList)));
                    }
                }

                return(FromAutomaton(AsAutomaton().Product(weightFunction.AsAutomaton())));
            }
Beispiel #10
0
            public MultiRepresentationWeightFunction <TDictionary> NormalizeStructure()
            {
                switch (weightFunction)
                {
                case TDictionary dictionary:
                    var filteredTruncated = dictionary.Dictionary.Where(kvp => !kvp.Value.IsZero).Take(2).ToList();
                    if (filteredTruncated.Count == 0)
                    {
                        return(Zero());
                    }
                    else if (filteredTruncated.Count == 1)
                    {
                        return(FromPoint(filteredTruncated.Single().Key));
                    }
                    else
                    {
                        return(FromDictionary(dictionary.NormalizeStructure()));
                    }

                case TAutomaton automaton:
                    if (!automaton.UsesGroups)
                    {
                        if (automaton.LogValueOverride == null && automaton.TryEnumerateSupport(MaxDictionarySize, out var support, false, 4 * MaxDictionarySize, true))
                        {
                            var list = support.ToList();
                            if (list.Count == 0)
                            {
                                return(Zero());
                            }
                            else if (list.Count == 1)
                            {
                                return(FromPoint(list[0]));
                            }
                            else
                            {
                                // Create a dictionary only if we expect it to be smaller than the automaton.
                                // Approximation uses sizes corresponding to a string automaton, which is the most used one.
                                // We don't require this comparison to be always precise - most of the times is good enough.
                                var dictSizeApprox     = list.Sum(el => SequenceManipulator.GetLength(el)) * sizeof(char) + (24 + 8 + sizeof(double)) * list.Count;
                                var automatonSizeAprox =
                                    24                                                                 // header
                                    + 16 + 2 * sizeof(double)                                          // 2 double? fields
                                                                                                       // Data Container
                                    + 2 * sizeof(int)                                                  // Flags and StartStateIndex
                                    + 2 * 24                                                           // Headers of the states and transitions arrays
                                    + automaton.Data.States.Count * (2 * sizeof(int) + sizeof(double)) // states
                                    + automaton.Data.Transitions.Count * 24                            // 24 is the size of one transition w/o storage for discrete char
                                    + automaton.Data.Transitions.Count(tr => !tr.IsEpsilon) * 80;
                                // 40 is the size of a DiscreteChar filled with nulls;
                                // another 40 is the size of an array with a single char range.
                                // Any specific DiscreteChar can be larger or can be cached.
                                // 40 seems an ok approximation for the average case.
                                if (dictSizeApprox < automatonSizeAprox)
                                {
                                    return(FromDictionary(
                                               DictionaryWeightFunction <TDictionary> .FromDistinctWeights(
                                                   list.Select(seq => new KeyValuePair <TSequence, Weight>(seq, Weight.FromLogValue(automaton.GetLogValue(seq)))))));
                                }
                            }
                        }
                        // TryEnumerateSupport(..., maxTraversedPaths, ...) is allowed to quit early
                        // on complex automata, so we need to explicitly check for point mass
                        var point = automaton.TryComputePoint();
                        if (point != null)
                        {
                            return(FromPoint(point));
                        }
                    }
                    break;
                }

                return(Clone()); // TODO: replace with `this` after making automata immutable
            }
Beispiel #11
0
                public MultiRepresentationWeightFunction <TDictionary> Sum(IEnumerable <MultiRepresentationWeightFunction <TDictionary> > weightFunctions)
                {
                    var  dictionary           = new Dictionary <TSequence, Weight>(MaxDictionarySize, SequenceManipulator.SequenceEqualityComparer);
                    bool resultFitsDictionary = true;

                    foreach (var weightFunction in weightFunctions)
                    {
                        if (weightFunction.IsCanonicZero())
                        {
                            continue;
                        }
                        if (weightFunction.weightFunction is PointMassWeightFunction pointMass)
                        {
                            if (dictionary.TryGetValue(pointMass.Point, out Weight oldWeight))
                            {
                                dictionary[pointMass.Point] = oldWeight + Weight.One;
                            }
                            else if (dictionary.Count < MaxDictionarySize)
                            {
                                dictionary.Add(pointMass.Point, Weight.One);
                            }
                            else
                            {
                                resultFitsDictionary = false;
                                break;
                            }
                        }
                        else if (weightFunction.weightFunction is TDictionary wfDictionary)
                        {
                            foreach (var kvp in wfDictionary.Dictionary)
                            {
                                if (dictionary.TryGetValue(kvp.Key, out Weight oldWeight))
                                {
                                    dictionary[kvp.Key] = oldWeight + kvp.Value;
                                }
                                else if (dictionary.Count < MaxDictionarySize)
                                {
                                    dictionary.Add(kvp.Key, kvp.Value);
                                }
                                else
                                {
                                    resultFitsDictionary = false;
                                    break;
                                }
                            }
                            if (!resultFitsDictionary)
                            {
                                break;
                            }
                        }
                        else
                        {
                            resultFitsDictionary = false;
                            break;
                        }
                    }

                    if (resultFitsDictionary)
                    {
                        if (dictionary.Count == 0)
                        {
                            return(Zero());
                        }
                        if (dictionary.Count == 1)
                        {
                            var singleKvp = dictionary.Single();
                            if (singleKvp.Value.LogValue == 0.0)
                            {
                                return(FromPoint(singleKvp.Key));
                            }
                        }
                        return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromDistinctWeights(dictionary)));
                    }

                    var automaton = new TAutomaton();

                    automaton.SetToSum(weightFunctions.Select(wf => wf.AsAutomaton()));
                    return(FromAutomaton(automaton));
                }