//private Dictionary<int, FiniteState> MergeLoopReferences(Dictionary<int, FiniteState> states) //{ //} /// <summary> /// Creates finite automaton based on tokens and unclassified tokens /// and creates dedicated state for single delimiters. /// </summary> /// <returns>List of created states with 0th as the root state.</returns> private List <FiniteState> GetFiniteAutomatonStates() { List <FiniteState> states = new List <FiniteState>(); FiniteState rootState = new FiniteState(); states.Add(rootState); startState = 0; rootState.Links = new NextFiniteStates(); // Process every token. foreach (var token in nodes.OfType <IDefinedToken>().OfType <IMedium>()) { CreateStatesForToken(token, states, rootState); } // Add states for unclassified terminals foreach (string unclassifiedTerminal in unclassifiedTerminals) { CreateStatesForUnclassifiedTerminals(unclassifiedTerminal, states, rootState, unclassifiedTerminal, unclassifiedTokenClassName); } // Add state for single delimiters AddStateForSingleDelimiter(states, rootState); // Now fix all states that have links with modified classes. FixLinksWithModifiedClasses(states); return(states); }
/// <summary> /// Create a state for single delimiters. /// </summary> /// <param name="states">States to add the created one to.</param> /// <param name="rootState">State to add link on new state into.</param> private void AddStateForSingleDelimiter(List <FiniteState> states, FiniteState rootState) { int stateIn = states.Count; FiniteState stateSingleDelimiter = new FiniteState(); stateSingleDelimiter.TokenName = unclassifiedTokenClassName; stateSingleDelimiter.TokenClass = unclassifiedTokenClassName; states.Add(stateSingleDelimiter); rootState.Links["singleDelimiter"] = stateIn; }
/// <summary> /// Create a state for single delimiters. /// </summary> /// <param name="states">States to add the created one to.</param> /// <param name="rootState">State to add link on new state into.</param> private void AddStateForSingleDelimiter(List <FiniteState> states, FiniteState rootState) { int stateIn = states.Count; FiniteState stateSingleDelimiter = new FiniteState(); stateSingleDelimiter.TokenNameId = TokenClassIndices.GetIndex(ClassTable.UnclassifiedTokenClassName); stateSingleDelimiter.TokenClassId = TokenClassIndices.GetIndex(ClassTable.UndefinedTokenClassName); states.Add(stateSingleDelimiter); rootState.Links["singleDelimiter"] = stateIn; }
/// <summary> /// Creates states for token. /// </summary> /// <remarks> /// Loops are gathered in reference list. /// Parsed nodes are gathered in parsed nodes list. /// When all the branches hit bottom or reached loop point /// - resolves loops by duplicating first links from parsed /// nodes in place of loop point. /// </remarks> /// <param name="token">Token to be processed.</param> /// <param name="states">List of all states created so far.</param> /// <param name="rootState">Parent state of all states a.k.a. 0th state.</param> private void CreateStatesForToken(IMedium token, List <FiniteState> states, FiniteState rootState) { // If there is a recursive case then the token cannot be converted // in to finite automaton. if (token.Recursion != null) { throw new Exception($"Finit automaton cannot be built on left-recursive token. " + $"{token}::={(IFactor)token}"); } // List of looped references var references = new List <(int stateId, IMedium referencedNode)>(); // List of parsed nodes and their nextStates // Value is a list of links gotten while parsing the node. // For example: // after parsing a node we got next states: // a->state1 // b->state2 // c->state3 // d->state4 // but part of them were already there from the root state // and part were created from processing the node. // Assuming that a and b links are related to the node, // they will be saved as a list of links for this node. // Hence, they will be used to resolve loop points. var parsedNodes = new Dictionary <IMedium, NextFiniteStates>(); parsedNodes[token] = CreateFiniteStatesForTokenFromFactor(token, token, states, rootState, references, parsedNodes, token as IDefinedToken); // Now get rid of references foreach ((int stateId, IMedium referencedNode) in references) { // We duplicate nextStates of nodes referenced. var nextStatesToInsert = parsedNodes[referencedNode].ToDictionary(n => n.Key, n => n.Value); // Try insert next states. foreach (var newNext in nextStatesToInsert) { if (states[stateId].Links.ContainsKey(newNext.Key)) { throw new Exception($"Looped state {stateId} already defines transition with given key {newNext.Key}."); } states[stateId].Links[newNext.Key] = newNext.Value; } } }
/// <summary> /// Creates finite automaton based on tokens and unclassified tokens /// and creates dedicated state for single delimiters. /// </summary> /// <returns>List of created states with 0th as the root state.</returns> private List <FiniteState> GetFiniteAutomatonStates() { List <FiniteState> states = new List <FiniteState>(); var undefined = TokenClassIndices.GetIndex(ClassTable.UndefinedTokenClassName); FiniteState rootState = new FiniteState { TokenClassId = undefined, TokenNameId = undefined, }; states.Add(rootState); startState = 0; rootState.Links = new Dictionary <string, int>(); // Process every token. foreach (var token in Nodes.Tokens.OfType <IMedium>()) { CreateStatesForToken(token, states, rootState); } // Add states for unclassified terminals foreach (string unclassifiedTerminal in UnclassifiedTerminals) { CreateStatesForUnclassifiedTerminals( unclassifiedTerminal, states, rootState, undefinedTokenNameId, unclassifiedTokenNameId ); } // Add state for single delimiters AddStateForSingleDelimiter(states, rootState); // Now fix all states that have links with modified classes. FixLinksWithModifiedClasses(states); return(states); }
/// <summary> /// Creates finite automaton states for tokens. /// </summary> /// <remarks> /// Also addes token terminal symbols to separate classes. /// </remarks> /// <param name="node">Node to which factor belongs.</param> /// <param name="factor">Factor to be processed.</param> /// <param name="states">List of all states.</param> /// <param name="currentState">State to which links will be added.</param> /// <param name="references">List of loop points that refer to one of parsed nodes.</param> /// <param name="parsedNodes">List of nodes which have been processed.</param> /// <param name="tokenName">Token name.</param> /// <returns>List of links.</returns> private Dictionary <string, int> CreateFiniteStatesForTokenFromFactor( IMedium node, IFactor factor, List <FiniteState> states, FiniteState currentState, List <(int stateId, IMedium referencedNode)> references,
/// <summary> /// Creates states for unclassified terminals. /// </summary> /// <param name="terminal">The terminal to be converted.</param> /// <param name="states">List of all states created so far.</param> /// <param name="currentState">The state where terminal will start its way through other states.</param> /// <param name="tokenName">Token name to be assigned once the bottom hit.</param> private void CreateStatesForUnclassifiedTerminals( string terminal, List <FiniteState> states, FiniteState currentState, string tokenName, string tokenClassName) { // Each character of terminal will be treated as // a separate class so we convert it to list of classes. List <string> terminalClassNames = GetClassesOfTerminalSymbols(terminal); // The state for iterational searching for character from next terminal // in nextState links, and next creating new states for each not found // character in next terminal so that the whole terminal can be gathered // while going through these states. FiniteState terminalState; // If the first character from the terminal is in one of links // then try go through all the states that has the same path // that is to be created. if (currentState.Links.ContainsKey(terminalClassNames.First())) { // Next State in which next char from next terminal is searched for. terminalState = states[currentState.Links[terminalClassNames.First()]]; foreach (string @class in terminalClassNames.Skip(1)) { // If there is next state for the character then go in and continue search. // Otherwise start creating states for each character. if (terminalState.Links.ContainsKey(@class)) { terminalState = states[terminalState.Links[@class]]; } else { (FiniteState nextState, int stateIn) = CreateState(); terminalState.Links[@class] = stateIn; terminalState = nextState; } } } else { // As there is no links that has first character of next terminal // then create states for each character // and terminal state will be current state. terminalState = currentState; foreach (string @class in terminalClassNames) { (FiniteState nextState, int stateIn) = CreateState(); terminalState.Links[@class] = stateIn; terminalState = nextState; } } // If it is already assigned and differs from current tokenName // then there is an attempt to assign two different tokenNames // to one state; therefore, an exception must be thrown. if (terminalState.TokenName != null && terminalState.TokenName != tokenName) { throw new Exception("Attempted assignment of second tokenName to one state."); } else { terminalState.TokenName = tokenName; terminalState.TokenClass = tokenClassName; } (FiniteState nextState, int stateIn) CreateState() { int stateIn = states.Count; FiniteState nextState = new FiniteState(); nextState.Links = new NextFiniteStates(); states.Add(nextState); return(nextState, stateIn); } }