/// <summary> /// Assigns a score to instantiation paths based on the predicted likelyhood that it conatins a matching loop. /// </summary> /// <returns>Higher values indicate a higher likelyhood of the path containing a matching loop.</returns> /// <remarks>All constants were determined experimentally.</remarks> private static double InstantiationPathScoreFunction(InstantiationPath instantiationPath, bool eliminatePrefix, bool eliminatePostfix) { //There may be some "outiers" before or after a matching loop. //We first identify quantifiers that occur at most outlierThreshold times as often as the most common quantifier in the path... var statistics = instantiationPath.Statistics(); var eliminationTreshhold = (statistics == null || !statistics.Any()) ? 1 : Math.Max(statistics.Max(dp => dp.Item2) * outlierThreshold, 1); var nonEliminatableQuantifiers = new HashSet <Tuple <Quantifier, Term, Term> >(statistics .Where(dp => dp.Item2 > eliminationTreshhold) .Select(dp => dp.Item1)); //...find the longest contigous subsequence that does not contain eliminatable quantifiers... var pathInstantiations = instantiationPath.getInstantiations(); var instantiations = pathInstantiations.Zip(pathInstantiations.Skip(1), (prev, next) => !next.bindingInfo.IsPatternMatch() ? Enumerable.Repeat(Tuple.Create <Quantifier, Term, Term>(next.Quant, null, null), 1) : next.bindingInfo.bindings.Where(kv => prev.concreteBody.isSubterm(kv.Value.Item2.id)) .Select(kv => Tuple.Create(next.Quant, next.bindingInfo.fullPattern, kv.Key))).ToArray(); var maxStartIndex = 0; var maxLength = 0; var lastMaxStartIndex = 0; var firstKept = nonEliminatableQuantifiers.Any(q => q.Item1 == pathInstantiations.First().Quant&& q.Item2 == pathInstantiations.First().bindingInfo.fullPattern); var curStartIndex = firstKept ? 0 : 1; var curLength = firstKept ? 1 : 0; for (var i = 0; i < instantiations.Count(); ++i) { if (instantiations[i].Any(q => nonEliminatableQuantifiers.Contains(q))) { ++curLength; } else { if (curLength > maxLength) { maxStartIndex = curStartIndex; lastMaxStartIndex = curStartIndex; maxLength = curLength; } else if (curLength == maxLength) { lastMaxStartIndex = curStartIndex; } curStartIndex = i + 2; curLength = 0; } } if (curLength > maxLength) { maxStartIndex = curStartIndex; lastMaxStartIndex = curStartIndex; maxLength = curLength; } else if (curLength == maxLength) { lastMaxStartIndex = curStartIndex; } //...and eliminate the prefix/postfix of that subsequence var remainingStart = eliminatePrefix ? maxStartIndex : 0; var remainingLength = (eliminatePostfix ? lastMaxStartIndex + maxLength : instantiations.Count()) - remainingStart; var remainingInstantiations = instantiationPath.getInstantiations().ToList().GetRange(remainingStart, remainingLength); if (remainingInstantiations.Count() == 0) { return(-1); } var remainingPath = new InstantiationPath(); foreach (var inst in remainingInstantiations) { remainingPath.append(inst); } /* We count the number of incoming edges (responsible instantiations that are not part of the path) and penalize them. * This ensures that we choose the best path. E.g. in the triangular case (A -> B, A -> C, B -> C) we want to choose A -> B -> C * and not A -> B which would have an incoming edge (B -> C). */ var numberIncomingEdges = 0; foreach (var inst in remainingInstantiations) { numberIncomingEdges += inst.ResponsibleInstantiations.Where(i => !instantiationPath.getInstantiations().Contains(i)).Count(); } /* the score is given by the number of remaining instantiations devided by the number of remaining quantifiers * which is an approximation for the number of repetitions of a matching loop occuring in that path. */ return((remainingPath.Length() - numberIncomingEdges * incomingEdgePenalizationFactor) / remainingPath.NumberOfDistinctQuantifierFingerprints()); }