public void NegateFailParse() { IScanner scanner = Provider.NewScanner; NegatableParser parser = ~Prims.Ch(MatchedChar); ParserMatch m = parser.Parse(scanner); Assert.IsFalse(m.Success); Assert.AreEqual(scanner.Offset, 0); }
public void FailParse() { IScanner scanner = Provider.NewScanner; CharParser parser = Prims.Ch(NonMatchedChar); ParserMatch m = parser.Parse(scanner); Assert.IsFalse(m.Success); Assert.AreEqual(scanner.Offset, 0); }
public void NegateSuccessParse() { IScanner scanner = Provider.NewScanner; NegatableParser parser = ~Prims.Ch(NonMatchedChar); ParserMatch m = parser.Parse(scanner); Assert.IsTrue(m.Success); Assert.AreEqual(m.Offset, 0); Assert.AreEqual(m.Length, 1); Assert.AreEqual(scanner.Offset, 1); }
public void KleeneStar() //http://www.codeproject.com/csharp/spart.asp?df=100&forumid=30315&select=797678#xx797678xx { Rule integer = new Rule("integer"); Rule number = new Rule("number"); Rule group = new Rule("group"); Rule term = new Rule("term"); Rule expression = new Rule("expression"); Parser add = Ops.Sequence('+', term); integer.Parser = Ops.Sequence(Prims.Digit, Ops.ZeroOrMore(Prims.Digit)); number.Parser = Ops.Sequence(Ops.Optional(integer), Prims.Ch('.'), integer); group.Parser = Ops.Sequence('(', expression, ')'); term.Parser = group | number | integer; expression.Parser = Ops.Sequence(term, Ops.ZeroOrMore(add)); Assert.IsTrue(expression.Parse(new StringScanner("(.99+100)")).Success); Assert.IsTrue(expression.Parse(new StringScanner("(5.99+100)")).Success); }
public void List() //http://www.codeproject.com/csharp/spart.asp?df=100&forumid=30315&select=797847#xx797847xx { Parser real = Ops.Sequence(Ops.OneOrMore(Prims.Digit), Ops.Optional(Ops.Sequence('.', Ops.OneOrMore(Prims.Digit)))); Rule numList = new Rule(); numList.Parser = Ops.Sequence( real, Ops.ZeroOrMore(Ops.Sequence(Prims.Ch(','), real)), Prims.End); Assert.IsTrue(numList.Parse(new StringScanner("100")).Success); // THROWS INDEX EXCEPTION Assert.IsFalse(numList.Parse(new StringScanner("88,d,88,9.090,")).Success); // PARSES SUCCESSFULLY! Assert.IsFalse(numList.Parse(new StringScanner("88,88,9.090,")).Success); // PARSES SUCCESSFULLY! }
public AttributeParser() { // Basic symbolic parameters Parser name = Ops.Seq(Prims.Letter | '_', Ops.Star(Prims.LetterOrDigit | '_')); m_NameRule = new Rule(name); m_SymbolRule = new Rule(name); // Integer and floating point parameters Parser natural = Ops.Plus(Prims.Digit); Parser integer = Ops.Seq(Ops.Optional('-'), natural); Parser scalar = Ops.Seq(Ops.Optional('-'), Ops.Star(Prims.Digit), '.', natural, Ops.Optional(Ops.Seq('e', integer))); m_IntegerRule = new Rule(integer); m_ScalarRule = new Rule(scalar); // String parameters Parser quotes_esc = Ops.Seq('\\', '\"'); NegatableParser quotes = Prims.Ch('\"'); Parser text = Ops.Seq('\"', Ops.Star(quotes_esc | ~quotes), '\"'); m_TextRule = new Rule(text); // Definition of an attribute Parser whitespace = Ops.Star(Prims.WhiteSpace); Parser assign = Ops.Seq(whitespace, '=', whitespace, m_SymbolRule | m_ScalarRule | m_IntegerRule | m_TextRule); m_AttributeRule = new Rule(Ops.Seq(m_NameRule, Ops.Optional(assign))); // The final definition of a list of attributes m_AttributesParser = Ops.Seq(whitespace, m_AttributeRule, Ops.Star(Ops.Seq(whitespace, ',', whitespace, m_AttributeRule))); // Setup event handlers for reading the parse results m_NameRule.Act += OnName; m_SymbolRule.Act += OnSymbol; m_IntegerRule.Act += OnInteger; m_ScalarRule.Act += OnScalar; m_TextRule.Act += OnText; }
public void LitteralCharTest() { Helper.Test(Prims.Ch('a'), 'a', ' '); }
private void DefineParsingRules() { // someWhiteSpace ::= WS+ // optionalWhiteSpace ::= WS* _someWhiteSpace = Ops.OneOrMore(Prims.WhiteSpace); _optionalWhiteSpace = Ops.ZeroOrMore(Prims.WhiteSpace); // Valid escaping formats (from http://www.icu-project.org/userguide/Collate_Customization.html ) // // Most of the characters can be used as parts of rules. // However, whitespace characters will be skipped over, // and all ASCII characters that are not digits or letters // are considered to be part of syntax. In order to use // these characters in rules, they need to be escaped. // Escaping can be done in several ways: // * Single characters can be escaped using backslash \ (U+005C). // * Strings can be escaped by putting them between single quotes 'like this'. // * Single quote can be quoted using two single quotes ''. // because Unicode escape sequences are allowed in LDML we need to handle those also, // escapeSequence ::= '\' U[A-F0-9]{8} | u[A-F0-9]{4} | anyChar _escapeSequence = Ops.Choice(new Parser[] { Ops.Sequence('\\', Ops.Sequence('U', Prims.HexDigit, Prims.HexDigit, Prims.HexDigit, Prims.HexDigit, Prims.HexDigit, Prims.HexDigit, Prims.HexDigit, Prims.HexDigit)), Ops.Sequence('\\', Ops.Sequence('u', Prims.HexDigit, Prims.HexDigit, Prims.HexDigit, Prims.HexDigit)), Ops.Sequence('\\', Ops.Expect("icu0002", "Invalid escape sequence.", Prims.AnyChar)) }); // singleQuoteLiteral ::= "''" // quotedStringCharacter ::= AllChars - "'" // quotedString ::= "'" (singleQuoteLiteral | quotedStringCharacter)+ "'" _singleQuoteLiteral = Prims.Str("''"); _quotedStringCharacter = Prims.AnyChar - '\''; _quotedString = Ops.Sequence('\'', Ops.OneOrMore(_singleQuoteLiteral | _quotedStringCharacter), Ops.Expect("icu0003", "Quoted string without matching end-quote.", '\'')); // Any alphanumeric ASCII character and all characters above the ASCII range are valid data characters // normalCharacter ::= [A-Za-z0-9] | [U+0080-U+1FFFFF] // dataCharacter ::= normalCharacter | singleQuoteLiteral | escapeSequence // dataString ::= (dataCharacter | quotedString) (WS? (dataCharacter | quotedString))* _normalCharacter = Prims.LetterOrDigit | Prims.Range('\u0080', char.MaxValue); _dataCharacter = _normalCharacter | _singleQuoteLiteral | _escapeSequence; _dataString = new Spart.Parsers.NonTerminal.Rule(Ops.List(_dataCharacter | _quotedString, _optionalWhiteSpace)); // firstOrLast ::= 'first' | 'last' // primarySecondaryTertiary ::= 'primary' | 'secondary' | 'tertiary' // indirectOption ::= (primarySecondaryTertiary WS 'ignorable') | 'variable' | 'regular' | 'implicit' | 'trailing' // indirectPosition ::= '[' WS? firstOrLast WS indirectOption WS? ']' // According to the LDML spec, "implicit" should not be allowed in a reset element, but we're not going to check that _firstOrLast = Ops.Choice("first", "last"); _primarySecondaryTertiary = Ops.Choice("primary", "secondary", "tertiary"); _indirectOption = Ops.Choice(Ops.Sequence(_primarySecondaryTertiary, _someWhiteSpace, "ignorable"), "variable", "regular", "implicit", "trailing"); _indirectPosition = new Spart.Parsers.NonTerminal.Rule(Ops.Sequence('[', _optionalWhiteSpace, Ops.Expect("icu0004", "Invalid indirect position specifier: unknown option", Ops.Sequence(_firstOrLast, _someWhiteSpace, _indirectOption)), _optionalWhiteSpace, Ops.Expect("icu0005", "Indirect position specifier missing closing ']'", ']'))); // top ::= '[' WS? 'top' WS? ']' // [top] is a deprecated element in ICU and should be replaced by indirect positioning. _top = Ops.Sequence('[', _optionalWhiteSpace, "top", _optionalWhiteSpace, ']'); // simpleElement ::= indirectPosition | dataString _simpleElement = new Spart.Parsers.NonTerminal.Rule("simpleElement", _indirectPosition | _dataString); // expansion ::= WS? '/' WS? simpleElement _expansion = new Spart.Parsers.NonTerminal.Rule("extend", Ops.Sequence(_optionalWhiteSpace, '/', _optionalWhiteSpace, Ops.Expect("icu0007", "Invalid expansion: Data missing after '/'", _simpleElement))); // prefix ::= simpleElement WS? '|' WS? _prefix = new Spart.Parsers.NonTerminal.Rule("context", Ops.Sequence(_simpleElement, _optionalWhiteSpace, '|', _optionalWhiteSpace)); // extendedElement ::= (prefix simpleElement expansion?) | (prefix? simpleElement expansion) _extendedElement = Ops.Sequence(_prefix, _simpleElement, !_expansion) | Ops.Sequence(!_prefix, _simpleElement, _expansion); // beforeOption ::= '1' | '2' | '3' // beforeSpecifier ::= '[' WS? 'before' WS beforeOption WS? ']' _beforeOption = Ops.Choice('1', '2', '3'); _beforeSpecifier = Ops.Sequence('[', _optionalWhiteSpace, "before", _someWhiteSpace, Ops.Expect("icu0010", "Invalid 'before' specifier: Invalid or missing option", _beforeOption), _optionalWhiteSpace, Ops.Expect("icu0011", "Invalid 'before' specifier: Missing closing ']'", ']')); // The difference operator initially caused some problems with parsing. The spart library doesn't // handle situations where the first choice is the beginning of the second choice. // Ex: differenceOperator = "<" | "<<" | "<<<" | "=" DOES NOT WORK! // That will fail to parse bothe the << and <<< operators because it always thinks it should match <. // However, differenceOperator = "<<<" | "<<" | "<" | "=" will work because it tries to match <<< first. // I'm using this strange production with the option '<' characters because it also works and doesn't // depend on order. It is less likely for someone to change it and unknowingly mess it up. // differenceOperator ::= ('<' '<'? '<'?) | '=' _differenceOperator = Ops.Sequence('<', !Prims.Ch('<'), !Prims.Ch('<')) | Prims.Ch('='); // simpleDifference ::= differenceOperator WS? simpleElement // extendedDifference ::= differenceOperator WS? extendedElement // difference ::= simpleDifference | extendedDifference // NOTE: Due to the implementation of the parser, extendedDifference MUST COME BEFORE simpleDifference in the difference definition _simpleDifference = new Spart.Parsers.NonTerminal.Rule("simpleDifference", Ops.Sequence(_differenceOperator, _optionalWhiteSpace, _simpleElement)); _extendedDifference = new Spart.Parsers.NonTerminal.Rule("x", Ops.Sequence(_differenceOperator, _optionalWhiteSpace, _extendedElement)); _difference = _extendedDifference | _simpleDifference; // reset ::= '&' WS? ((beforeSpecifier? WS? simpleElement) | top) _reset = new Spart.Parsers.NonTerminal.Rule("reset", Ops.Sequence('&', _optionalWhiteSpace, _top | Ops.Sequence(!_beforeSpecifier, _optionalWhiteSpace, _simpleElement))); // This option is a weird one, as it can come at any place in a rule and sets the preceding // dataString as the variable top option in the settings element. So, it has to look at the // data for the preceding element to know its own value, but leaves the preceding and any // succeeding elements as if the variable top option wasn't there. Go figure. // Also, it's really probably only valid following a simpleDifference or reset with a dataString // and not an indirect position, but checking for all that in the grammar would be very convoluted, so // we'll do it in the semantic action and throw. Yuck. // optionVariableTop ::= '<' WS? '[' WS? 'variable' WS? 'top' WS? ']' _optionVariableTop = Ops.Sequence('<', _optionalWhiteSpace, '[', _optionalWhiteSpace, "variable", _optionalWhiteSpace, "top", _optionalWhiteSpace, ']'); // oneRule ::= reset (WS? (optionVariableTop | difference))* _oneRule = new Spart.Parsers.NonTerminal.Rule("oneRule", Ops.Sequence(_reset, Ops.ZeroOrMore(Ops.Sequence(_optionalWhiteSpace, _optionVariableTop | _difference)))); // Option notes: // * The 'strength' option is specified in ICU as having valid values 1-4 and 'I'. In the LDML spec, it // seems to indicate that valid values in ICU are 1-5, so I am accepting both and treating 'I' and '5' // as the same. I'm also accepting 'I' and 'i', although my approach is, in general, to be case-sensitive. // * The 'numeric' option is not mentioned on the ICU website, but it is implied as being acceptable ICU // in the LDML spec, so I am supporting it here. // * There are LDML options 'match-boundaries' and 'match-style' that are not in ICU, so they are not listed here. // * The UCA spec seems to indicate that there is a 'locale' option which is not mentioned in either the // LDML or ICU specs, so I am not supporting it here. It could be referring to the 'base' element that // is an optional part of the 'collation' element in LDML. // optionOnOff ::= 'on' | 'off' // optionAlternate ::= 'alternate' WS ('non-ignorable' | 'shifted') // optionBackwards ::= 'backwards' WS ('1' | '2') // optionNormalization ::= 'normalization' WS optionOnOff // optionCaseLevel ::= 'caseLevel' WS optionOnOff // optionCaseFirst ::= 'caseFirst' WS ('off' | 'upper' | 'lower') // optionStrength ::= 'strength' WS ('1' | '2' | '3' | '4' | 'I' | 'i' | '5') // optionHiraganaQ ::= 'hiraganaQ' WS optionOnOff // optionNumeric ::= 'numeric' WS optionOnOff // characterSet ::= '[' (AnyChar - ']')* ']' // optionSuppressContractions ::= 'suppress' WS 'contractions' WS characterSet // optionOptimize ::= 'optimize' WS characterSet // option ::= '[' WS? (optionAlternate | optionBackwards | optionNormalization | optionCaseLevel // | optionCaseFirst | optionStrength | optionHiraganaQ | optionNumeric // | optionSuppressContractions | optionOptimize) WS? ']' _optionOnOff = Ops.Choice("on", "off"); _optionAlternate = Ops.Sequence("alternate", _someWhiteSpace, Ops.Choice("non-ignorable", "shifted")); _optionBackwards = Ops.Sequence("backwards", _someWhiteSpace, Ops.Choice('1', '2')); _optionNormalization = Ops.Sequence("normalization", _someWhiteSpace, _optionOnOff); _optionCaseLevel = Ops.Sequence("caseLevel", _someWhiteSpace, _optionOnOff); _optionCaseFirst = Ops.Sequence("caseFirst", _someWhiteSpace, Ops.Choice("off", "upper", "lower")); _optionStrength = Ops.Sequence("strength", _someWhiteSpace, Ops.Choice('1', '2', '3', '4', 'I', 'i', '5')); _optionHiraganaQ = Ops.Sequence("hiraganaQ", _someWhiteSpace, _optionOnOff); _optionNumeric = Ops.Sequence("numeric", _someWhiteSpace, _optionOnOff); _characterSet = Ops.Sequence('[', Ops.ZeroOrMore(Prims.AnyChar - ']'), ']'); _optionSuppressContractions = Ops.Sequence("suppress", _someWhiteSpace, "contractions", _someWhiteSpace, _characterSet); _optionOptimize = Ops.Sequence("optimize", _someWhiteSpace, _characterSet); _option = new Spart.Parsers.NonTerminal.Rule("option", Ops.Sequence('[', _optionalWhiteSpace, _optionAlternate | _optionBackwards | _optionNormalization | _optionCaseLevel | _optionCaseFirst | _optionStrength | _optionHiraganaQ | _optionNumeric | _optionSuppressContractions | _optionOptimize, _optionalWhiteSpace, ']')); // I don't know if ICU requires all options first (it's unclear), but I am. :) // icuRules ::= WS? (option WS?)* (oneRule WS?)* EOF _icuRules = new Spart.Parsers.NonTerminal.Rule("icuRules", Ops.Sequence(_optionalWhiteSpace, Ops.ZeroOrMore(Ops.Sequence(_option, _optionalWhiteSpace)), Ops.ZeroOrMore(Ops.Sequence(_oneRule, _optionalWhiteSpace)), Ops.Expect("icu0015", "Invalid ICU rules.", Prims.End))); if (_useDebugger) { _debugger = new Spart.Debug.Debugger(Console.Out); _debugger += _option; _debugger += _oneRule; _debugger += _reset; _debugger += _simpleElement; _debugger += _simpleDifference; _debugger += _extendedDifference; _debugger += _dataString; } }