Пример #1
0
            public MultiRepresentationWeightFunction <TDictionary> Repeat(int minTimes = 1, int?maxTimes = null)
            {
                Argument.CheckIfInRange(minTimes >= 0, nameof(minTimes), "The minimum number of repetitions must be non-negative.");
                Argument.CheckIfValid(!maxTimes.HasValue || maxTimes.Value >= minTimes, "The maximum number of repetitions must not be less than the minimum number.");

                if (weightFunction is PointMassWeightFunction pointMass && maxTimes.HasValue && maxTimes - minTimes < MaxDictionarySize)
                {
                    var newSequenceElements = new List <TElement>(SequenceManipulator.GetLength(pointMass.Point) * maxTimes.Value);
                    for (int i = 0; i < minTimes; ++i)
                    {
                        newSequenceElements.AddRange(pointMass.Point);
                    }
                    if (minTimes == maxTimes)
                    {
                        return(FromPoint(SequenceManipulator.ToSequence(newSequenceElements)));
                    }
                    else
                    {
                        Weight uniformWeight = Weight.FromValue(1.0 / (maxTimes.Value - minTimes));
                        Dictionary <TSequence, Weight> dict = new Dictionary <TSequence, Weight>(maxTimes.Value - minTimes + 1);
                        dict.Add(SequenceManipulator.ToSequence(newSequenceElements), uniformWeight);
                        for (int i = minTimes + 1; i <= maxTimes.Value; ++i)
                        {
                            newSequenceElements.AddRange(pointMass.Point);
                            dict.Add(SequenceManipulator.ToSequence(newSequenceElements), uniformWeight);
                        }
                        return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromDistinctWeights(dict)));
                    }
                }
                if (weightFunction is TDictionary dictionary && maxTimes.HasValue)
                {
                    var resultSupportSize = ResultSupportSize(dictionary.Dictionary.Count, minTimes, maxTimes.Value);
                    if (resultSupportSize <= MaxDictionarySize)
                    {
                        return(FromDictionary(dictionary.Repeat(minTimes, maxTimes.Value, (int)resultSupportSize + 1)));
                    }
                }

                return(FromAutomaton(AsAutomaton().Repeat(minTimes, maxTimes)));

                double ResultSupportSize(int sourceSupportSize, int minReps, int maxReps)
                {
                    return(Math.Pow(sourceSupportSize, minReps) * (1 - Math.Pow(sourceSupportSize, maxReps - minReps + 1)) / (1 - sourceSupportSize));
                }
            }
Пример #2
0
            public MultiRepresentationWeightFunction <TDictionary> NormalizeStructure()
            {
                switch (weightFunction)
                {
                case TDictionary dictionary:
                    var filteredTruncated = dictionary.Dictionary.Where(kvp => !kvp.Value.IsZero).Take(2).ToList();
                    if (filteredTruncated.Count == 0)
                    {
                        return(Zero());
                    }
                    else if (filteredTruncated.Count == 1)
                    {
                        return(FromPoint(filteredTruncated.Single().Key));
                    }
                    else
                    {
                        return(FromDictionary(dictionary.NormalizeStructure()));
                    }

                case TAutomaton automaton:
                    if (!automaton.UsesGroups)
                    {
                        if (automaton.LogValueOverride == null && automaton.TryEnumerateSupport(MaxDictionarySize, out var support, false, 4 * MaxDictionarySize, true))
                        {
                            var list = support.ToList();
                            if (list.Count == 0)
                            {
                                return(Zero());
                            }
                            else if (list.Count == 1)
                            {
                                return(FromPoint(list[0]));
                            }
                            else
                            {
                                // Create a dictionary only if we expect it to be smaller than the automaton.
                                // Approximation uses sizes corresponding to a string automaton, which is the most used one.
                                // We don't require this comparison to be always precise - most of the times is good enough.
                                var dictSizeApprox     = list.Sum(el => SequenceManipulator.GetLength(el)) * sizeof(char) + (24 + 8 + sizeof(double)) * list.Count;
                                var automatonSizeAprox =
                                    24                                                                 // header
                                    + 16 + 2 * sizeof(double)                                          // 2 double? fields
                                                                                                       // Data Container
                                    + 2 * sizeof(int)                                                  // Flags and StartStateIndex
                                    + 2 * 24                                                           // Headers of the states and transitions arrays
                                    + automaton.Data.States.Count * (2 * sizeof(int) + sizeof(double)) // states
                                    + automaton.Data.Transitions.Count * 24                            // 24 is the size of one transition w/o storage for discrete char
                                    + automaton.Data.Transitions.Count(tr => !tr.IsEpsilon) * 80;
                                // 40 is the size of a DiscreteChar filled with nulls;
                                // another 40 is the size of an array with a single char range.
                                // Any specific DiscreteChar can be larger or can be cached.
                                // 40 seems an ok approximation for the average case.
                                if (dictSizeApprox < automatonSizeAprox)
                                {
                                    return(FromDictionary(
                                               DictionaryWeightFunction <TDictionary> .FromDistinctWeights(
                                                   list.Select(seq => new KeyValuePair <TSequence, Weight>(seq, Weight.FromLogValue(automaton.GetLogValue(seq)))))));
                                }
                            }
                        }
                        // TryEnumerateSupport(..., maxTraversedPaths, ...) is allowed to quit early
                        // on complex automata, so we need to explicitly check for point mass
                        var point = automaton.TryComputePoint();
                        if (point != null)
                        {
                            return(FromPoint(point));
                        }
                    }
                    break;
                }

                return(Clone()); // TODO: replace with `this` after making automata immutable
            }