public static RegularLanguageDFAUnicodeGraph BuildUnicodeGraph(this RegularLanguageDFAState state) { RegularLanguageDFAUnicodeGraph result = new RegularLanguageDFAUnicodeGraph() { UnicodeGraph = new UnicodeTargetGraph() }; if (state.OutTransitions.Count == 0) { return(result); } var fullSet = state.OutTransitions.FullCheck; foreach (var transition in state.OutTransitions) { /* * * Send in the current transition's requirement, along with the full set * to breakdown the unicode subsets contained within. * */ var breakdown = Breakdown(transition.Key, fullSet); /* * * If the remainder of the unicode breakdown does not overlap enough * of a category to include it, denote the remainder. * */ if (breakdown.Item1 != null && !breakdown.Item1.IsEmpty) { result.Add(breakdown.Item1, transition.Value); } /* * * If there are partial and full unicode sets, * push them into the unicode target logic result.UnicodeGraph. * */ if (breakdown.Item2.Length > 0 || breakdown.Item3.Count > 0) { IUnicodeTarget target = null; if (!result.UnicodeGraph.TryGetValue(transition.Value, out target)) { target = result.UnicodeGraph.Add(transition.Value, transition.Value == state); } //Full sets are simple. foreach (var category in breakdown.Item2) { target.Add(category); } var item3 = breakdown.Item3; /* * * Partial sets are a bit more interesting. * */ foreach (var partialCategory in item3.Keys) { /* * * If the partial set doesn't contain a remainder, * the original remainder was consumed by the overall * checks that occur before it. * * * As an example, if the category is Ll, assuming there * are other paths that utilize a-z, the original check used to * construct the unicode breakdown would note this, but * the full set sent into the breakdown method would negate * the negative set (if a-z are already checked, * there is no need to check that the character -isn't- * in that range). * */ if (item3[partialCategory] == null) { target.Add(partialCategory); } else { target.Add(partialCategory, item3[partialCategory]); } } } } return(result); }
public static Dictionary <RegularLanguageDFAState, RegularLanguageDFAHandlingType> DetermineStateHandlingTypes(Dictionary <RegularLanguageDFAState, RegularLanguageDFAStateJumpData> multitargetLookup, RegularLanguageDFAUnicodeGraph dfaTransitionTable) { var subset = ((dfaTransitionTable.UnicodeGraph == null) ? (new IUnicodeTarget[0]) : (IEnumerable <IUnicodeTarget>)dfaTransitionTable.UnicodeGraph.Values).Select(k => k.Target); var setToIterate = subset.Concat(dfaTransitionTable.Values.Where(target => multitargetLookup.ContainsKey(target))).Concat(dfaTransitionTable.Values.Intersect(subset).Where(k => !multitargetLookup.ContainsKey(k))).Distinct().ToArray(); var jumpTargetStates = new HashSet <RegularLanguageDFAState>( from target in setToIterate where !multitargetLookup.ContainsKey(target) select target); var externalJumpTargets = new HashSet <RegularLanguageDFAState>( from target in setToIterate where multitargetLookup.ContainsKey(target) select target); var inlineableTargets = new HashSet <RegularLanguageDFAState>(dfaTransitionTable.Values.Except(jumpTargetStates.Concat(externalJumpTargets))); var falsePositiveInlineableTargets = new HashSet <RegularLanguageDFAState>(); foreach (var inlineElement in inlineableTargets) { var check = dfaTransitionTable.Where(kvp => kvp.Value == inlineElement).Select(kvp => kvp.Key).Aggregate(RegularLanguageSet.UnionAggregateDelegate); if (!dfaTransitionTable.ContainsKey(check)) { falsePositiveInlineableTargets.Add(inlineElement); } else /* Large groupings of characters could be quicker as an if statement across the range, i.e.: 'a' <= nextChar && nextChar <= 'z' vs 26 separate jump notations. */ if (check.GetInefficientSwitchCases().Count() > 0) { falsePositiveInlineableTargets.Add(inlineElement); } } if (falsePositiveInlineableTargets.Count > 0) { inlineableTargets = new HashSet <RegularLanguageDFAState>(inlineableTargets.Except(falsePositiveInlineableTargets)); jumpTargetStates = new HashSet <RegularLanguageDFAState>(jumpTargetStates.Concat(falsePositiveInlineableTargets)); } var allStates = new HashSet <RegularLanguageDFAState>( jumpTargetStates .Concat(externalJumpTargets) .Concat(inlineableTargets)); var dfaHandlingMechanisms = (from state in allStates let type = jumpTargetStates.Contains(state) ? RegularLanguageDFAHandlingType.LocalJump : externalJumpTargets.Contains(state) ? RegularLanguageDFAHandlingType.GlobalJump : inlineableTargets.Contains(state) ? RegularLanguageDFAHandlingType.Inline : RegularLanguageDFAHandlingType.Unknown select new { State = state, Type = type }).ToDictionary(k => k.State, v => v.Type); return(dfaHandlingMechanisms); }