public MultiRepresentationWeightFunction <TDictionary> Sum(MultiRepresentationWeightFunction <TDictionary> weightFunction) { if (weightFunction.IsCanonicZero()) { return(Clone()); // TODO: return `this` when automata become immutable } if (IsCanonicZero()) { return(weightFunction.Clone()); // TODO: return weightFunction when automata become immutable } if (weightFunction.weightFunction is TAutomaton otherAutomaton) { return(FromAutomaton(AsAutomaton().Sum(otherAutomaton))); } if (this.weightFunction is TAutomaton thisAutomaton) { return(FromAutomaton(thisAutomaton.Sum(weightFunction.AsAutomaton()))); } // Now both weight functions are either point masses or dictionaries var thisDictionary = this.weightFunction as TDictionary ?? DictionaryWeightFunction <TDictionary> .FromPoint(((PointMassWeightFunction)this.weightFunction).Point); var otherDictionary = weightFunction.weightFunction as TDictionary ?? DictionaryWeightFunction <TDictionary> .FromPoint(((PointMassWeightFunction)weightFunction.weightFunction).Point); var resultDictionary = thisDictionary.Sum(otherDictionary); if (resultDictionary.Dictionary.Count <= MaxDictionarySize) { return(FromDictionary(resultDictionary)); } else { return(FromAutomaton(resultDictionary.AsAutomaton())); } }
public double MaxDiff(MultiRepresentationWeightFunction <TDictionary> that) { if (IsCanonicZero()) { return(that.IsZero() ? 0.0 : Math.E); } if (that.IsCanonicZero()) { return(IsZero() ? 0.0 : Math.E); } switch (weightFunction) { case TAutomaton automaton: return(automaton.MaxDiff(that.AsAutomaton())); case TDictionary dictionary: switch (that.weightFunction) { case TAutomaton otherAutomaton: return(AsAutomaton().MaxDiff(otherAutomaton)); case TDictionary otherDictionary: return(dictionary.MaxDiff(otherDictionary)); case PointMassWeightFunction otherPointMass: return(dictionary.MaxDiff(DictionaryWeightFunction <TDictionary> .FromPoint(otherPointMass.Point))); default: throw new InvalidOperationException("Other function has an invalid type"); } case PointMassWeightFunction pointMass: switch (that.weightFunction) { case TAutomaton otherAutomaton: return(AsAutomaton().MaxDiff(otherAutomaton)); case TDictionary otherDictionary: return(otherDictionary.MaxDiff(DictionaryWeightFunction <TDictionary> .FromPoint(pointMass.Point))); case PointMassWeightFunction otherPointMass: return(pointMass.MaxDiff(otherPointMass)); default: throw new InvalidOperationException("Other function has an invalid type"); } default: throw new InvalidOperationException("Current function has an invalid type"); } }
public MultiRepresentationWeightFunction <TDictionary> Repeat(int minTimes = 1, int?maxTimes = null) { Argument.CheckIfInRange(minTimes >= 0, nameof(minTimes), "The minimum number of repetitions must be non-negative."); Argument.CheckIfValid(!maxTimes.HasValue || maxTimes.Value >= minTimes, "The maximum number of repetitions must not be less than the minimum number."); if (weightFunction is PointMassWeightFunction pointMass && maxTimes.HasValue && maxTimes - minTimes < MaxDictionarySize) { var newSequenceElements = new List <TElement>(SequenceManipulator.GetLength(pointMass.Point) * maxTimes.Value); for (int i = 0; i < minTimes; ++i) { newSequenceElements.AddRange(pointMass.Point); } if (minTimes == maxTimes) { return(FromPoint(SequenceManipulator.ToSequence(newSequenceElements))); } else { Weight uniformWeight = Weight.FromValue(1.0 / (maxTimes.Value - minTimes)); Dictionary <TSequence, Weight> dict = new Dictionary <TSequence, Weight>(maxTimes.Value - minTimes + 1); dict.Add(SequenceManipulator.ToSequence(newSequenceElements), uniformWeight); for (int i = minTimes + 1; i <= maxTimes.Value; ++i) { newSequenceElements.AddRange(pointMass.Point); dict.Add(SequenceManipulator.ToSequence(newSequenceElements), uniformWeight); } return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromDistinctWeights(dict))); } } if (weightFunction is TDictionary dictionary && maxTimes.HasValue) { var resultSupportSize = ResultSupportSize(dictionary.Dictionary.Count, minTimes, maxTimes.Value); if (resultSupportSize <= MaxDictionarySize) { return(FromDictionary(dictionary.Repeat(minTimes, maxTimes.Value, (int)resultSupportSize + 1))); } } return(FromAutomaton(AsAutomaton().Repeat(minTimes, maxTimes))); double ResultSupportSize(int sourceSupportSize, int minReps, int maxReps) { return(Math.Pow(sourceSupportSize, minReps) * (1 - Math.Pow(sourceSupportSize, maxReps - minReps + 1)) / (1 - sourceSupportSize)); } }
public MultiRepresentationWeightFunction <TDictionary> Append(MultiRepresentationWeightFunction <TDictionary> weightFunction, int group = 0) { if (this.weightFunction == null || weightFunction.weightFunction == null) { return(Zero()); } if (group == 0) { if (weightFunction.weightFunction is PointMassWeightFunction otherPointMass) { if (this.weightFunction is PointMassWeightFunction thisPointMass) { return(FromPointMass(thisPointMass.Append(otherPointMass.Point))); } if (this.weightFunction is TDictionary thisDictionary) { return(FromDictionary(thisDictionary.Append(otherPointMass.Point))); } } if (weightFunction.weightFunction is TDictionary otherDictionary) { if (this.weightFunction is PointMassWeightFunction thisPointMass) { return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromPoint(thisPointMass.Point).Append(otherDictionary))); } if (this.weightFunction is TDictionary thisDictionary && thisDictionary.Dictionary.Count * otherDictionary.Dictionary.Count <= MaxDictionarySize) { return(FromDictionary(thisDictionary.Append(otherDictionary))); } } } if (weightFunction.weightFunction is PointMassWeightFunction pointMass) { return(FromAutomaton(this.weightFunction.AsAutomaton().Append(pointMass.Point, group))); } return(FromAutomaton(this.weightFunction.AsAutomaton().Append(weightFunction.weightFunction.AsAutomaton(), group))); }
public MultiRepresentationWeightFunction <TDictionary> ScaleLog(double logScale) { switch (weightFunction) { case null: return(Zero()); case PointMassWeightFunction pointMass: return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromDistinctWeights( new[] { new KeyValuePair <TSequence, Weight>(pointMass.Point, Weight.FromLogValue(logScale)) }))); case TDictionary dictionary: return(FromDictionary(dictionary.ScaleLog(logScale))); case TAutomaton automaton: return(FromAutomaton(automaton.ScaleLog(logScale))); default: throw new InvalidOperationException("Current function has an invalid type"); } }
/// <inheritdoc/> public MultiRepresentationWeightFunction <TDictionary> ConstantOnSupportOfLog(double logValue, MultiRepresentationWeightFunction <TDictionary> weightFunction) { if (weightFunction.TryEnumerateSupportInternal(MaxDictionarySize, out var support)) { if (!support.Any()) { return(Zero()); } if (logValue == 0 && !support.Skip(1).Any()) { return(FromPoint(support.Single())); } var weight = Weight.FromLogValue(logValue); return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromDistinctWeights( support.Select(sequence => new KeyValuePair <TSequence, Weight>(sequence, weight))))); } var automaton = weightFunction.AsAutomaton().ConstantOnSupportLog(logValue); return(FromAutomaton(automaton)); }
public MultiRepresentationWeightFunction <TDictionary> FromValues(IEnumerable <KeyValuePair <TSequence, double> > sequenceWeightPairs) { var collection = sequenceWeightPairs as ICollection <KeyValuePair <TSequence, double> > ?? sequenceWeightPairs.ToList(); if (collection.Count == 0) { return(Zero()); } if (collection.Count == 1 && collection.Single().Value == 1.0) { return(FromPoint(collection.Single().Key)); } else { if (collection.Count <= MaxDictionarySize) { return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromValues(sequenceWeightPairs))); } else { return(FromAutomaton(Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TAutomaton> .FromValues(collection))); } } }
public MultiRepresentationWeightFunction <TDictionary> SumLog(double logWeight1, double logWeight2, MultiRepresentationWeightFunction <TDictionary> weightFunction) { if (weightFunction.IsCanonicZero() || double.IsNegativeInfinity(logWeight2)) { return(ScaleLog(logWeight1)); } if (IsCanonicZero() || double.IsNegativeInfinity(logWeight1)) { return(weightFunction.ScaleLog(logWeight2)); } if (weightFunction.weightFunction is TAutomaton otherAutomaton) { return(FromAutomaton(AsAutomaton().SumLog(logWeight1, logWeight2, otherAutomaton))); } if (this.weightFunction is TAutomaton thisAutomaton) { return(FromAutomaton(thisAutomaton.SumLog(logWeight1, logWeight2, weightFunction.AsAutomaton()))); } // Now both weight functions are either point masses or dictionaries var thisDictionary = this.weightFunction as TDictionary ?? DictionaryWeightFunction <TDictionary> .FromPoint(((PointMassWeightFunction)this.weightFunction).Point); var otherDictionary = weightFunction.weightFunction as TDictionary ?? DictionaryWeightFunction <TDictionary> .FromPoint(((PointMassWeightFunction)weightFunction.weightFunction).Point); var resultDictionary = thisDictionary.SumLog(logWeight1, logWeight2, otherDictionary); if (resultDictionary.Dictionary.Count <= MaxDictionarySize) { return(FromDictionary(resultDictionary)); } else { return(FromAutomaton(resultDictionary.AsAutomaton())); } }
public MultiRepresentationWeightFunction <TDictionary> Product(MultiRepresentationWeightFunction <TDictionary> weightFunction) { if (IsCanonicZero() || weightFunction.IsCanonicZero()) { return(Zero()); } PointMassWeightFunction pointMass = null; IWeightFunction other = null; if (this.weightFunction is PointMassWeightFunction thisPointMass) { pointMass = thisPointMass; other = weightFunction.weightFunction; } else if (weightFunction.weightFunction is PointMassWeightFunction otherPointMass) { pointMass = otherPointMass; other = this.weightFunction; } if (pointMass != null && !other.UsesGroups) { var logValue = other.GetLogValue(pointMass.Point); if (double.IsNegativeInfinity(logValue)) { return(Zero()); } else if (logValue == 0.0) { return(FromPointMass(pointMass)); } else { return(FromDictionary( DictionaryWeightFunction <TDictionary> .FromDistinctWeights( new[] { new KeyValuePair <TSequence, Weight>(pointMass.Point, Weight.FromLogValue(logValue)) }))); } } TDictionary dictionary = null; if (this.weightFunction is TDictionary thisDictionary) { if (weightFunction.weightFunction is TDictionary secondDictionary) { return(FromDictionary(thisDictionary.Product(secondDictionary))); } dictionary = thisDictionary; other = weightFunction.weightFunction; } else if (weightFunction.weightFunction is TDictionary otherDictionary) { dictionary = otherDictionary; other = this.weightFunction; } if (dictionary != null && !other.UsesGroups) { var resultList = new List <KeyValuePair <TSequence, Weight> >(dictionary.Dictionary.Count); foreach (var kvp in dictionary.Dictionary) { if (!kvp.Value.IsZero) { var otherLogValue = other.GetLogValue(kvp.Key); if (!double.IsNegativeInfinity(otherLogValue)) { resultList.Add(new KeyValuePair <TSequence, Weight>(kvp.Key, kvp.Value * Weight.FromLogValue(otherLogValue))); } } } if (resultList.Count == 0) { return(Zero()); } else if (resultList.Count == 1 && resultList[0].Value.LogValue == 0.0) { return(FromPoint(resultList[0].Key)); } else { return(FromDictionary( DictionaryWeightFunction <TDictionary> .FromDistinctWeights(resultList))); } } return(FromAutomaton(AsAutomaton().Product(weightFunction.AsAutomaton()))); }
public MultiRepresentationWeightFunction <TDictionary> NormalizeStructure() { switch (weightFunction) { case TDictionary dictionary: var filteredTruncated = dictionary.Dictionary.Where(kvp => !kvp.Value.IsZero).Take(2).ToList(); if (filteredTruncated.Count == 0) { return(Zero()); } else if (filteredTruncated.Count == 1) { return(FromPoint(filteredTruncated.Single().Key)); } else { return(FromDictionary(dictionary.NormalizeStructure())); } case TAutomaton automaton: if (!automaton.UsesGroups) { if (automaton.LogValueOverride == null && automaton.TryEnumerateSupport(MaxDictionarySize, out var support, false, 4 * MaxDictionarySize, true)) { var list = support.ToList(); if (list.Count == 0) { return(Zero()); } else if (list.Count == 1) { return(FromPoint(list[0])); } else { // Create a dictionary only if we expect it to be smaller than the automaton. // Approximation uses sizes corresponding to a string automaton, which is the most used one. // We don't require this comparison to be always precise - most of the times is good enough. var dictSizeApprox = list.Sum(el => SequenceManipulator.GetLength(el)) * sizeof(char) + (24 + 8 + sizeof(double)) * list.Count; var automatonSizeAprox = 24 // header + 16 + 2 * sizeof(double) // 2 double? fields // Data Container + 2 * sizeof(int) // Flags and StartStateIndex + 2 * 24 // Headers of the states and transitions arrays + automaton.Data.States.Count * (2 * sizeof(int) + sizeof(double)) // states + automaton.Data.Transitions.Count * 24 // 24 is the size of one transition w/o storage for discrete char + automaton.Data.Transitions.Count(tr => !tr.IsEpsilon) * 80; // 40 is the size of a DiscreteChar filled with nulls; // another 40 is the size of an array with a single char range. // Any specific DiscreteChar can be larger or can be cached. // 40 seems an ok approximation for the average case. if (dictSizeApprox < automatonSizeAprox) { return(FromDictionary( DictionaryWeightFunction <TDictionary> .FromDistinctWeights( list.Select(seq => new KeyValuePair <TSequence, Weight>(seq, Weight.FromLogValue(automaton.GetLogValue(seq))))))); } } } // TryEnumerateSupport(..., maxTraversedPaths, ...) is allowed to quit early // on complex automata, so we need to explicitly check for point mass var point = automaton.TryComputePoint(); if (point != null) { return(FromPoint(point)); } } break; } return(Clone()); // TODO: replace with `this` after making automata immutable }
public MultiRepresentationWeightFunction <TDictionary> Sum(IEnumerable <MultiRepresentationWeightFunction <TDictionary> > weightFunctions) { var dictionary = new Dictionary <TSequence, Weight>(MaxDictionarySize, SequenceManipulator.SequenceEqualityComparer); bool resultFitsDictionary = true; foreach (var weightFunction in weightFunctions) { if (weightFunction.IsCanonicZero()) { continue; } if (weightFunction.weightFunction is PointMassWeightFunction pointMass) { if (dictionary.TryGetValue(pointMass.Point, out Weight oldWeight)) { dictionary[pointMass.Point] = oldWeight + Weight.One; } else if (dictionary.Count < MaxDictionarySize) { dictionary.Add(pointMass.Point, Weight.One); } else { resultFitsDictionary = false; break; } } else if (weightFunction.weightFunction is TDictionary wfDictionary) { foreach (var kvp in wfDictionary.Dictionary) { if (dictionary.TryGetValue(kvp.Key, out Weight oldWeight)) { dictionary[kvp.Key] = oldWeight + kvp.Value; } else if (dictionary.Count < MaxDictionarySize) { dictionary.Add(kvp.Key, kvp.Value); } else { resultFitsDictionary = false; break; } } if (!resultFitsDictionary) { break; } } else { resultFitsDictionary = false; break; } } if (resultFitsDictionary) { if (dictionary.Count == 0) { return(Zero()); } if (dictionary.Count == 1) { var singleKvp = dictionary.Single(); if (singleKvp.Value.LogValue == 0.0) { return(FromPoint(singleKvp.Key)); } } return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromDistinctWeights(dictionary))); } var automaton = new TAutomaton(); automaton.SetToSum(weightFunctions.Select(wf => wf.AsAutomaton())); return(FromAutomaton(automaton)); }