private static async Task TestParser() { var parser = new RegexParser<BankEvent>( @"Pokupka, (?<agent>.*), karta \*(?<cardId>\d+), (?<date>\d+\.\d+\.\d+ \d+:\d+), (?<delta>\d+.\d+) rub. Dostupno = (?<available>\d+\.\d+)\s* rub"); var pokupka = "Pokupka, MARKET PLACE, karta *000883, 10.02.16 15:09, 223.00 rub. Dostupno = 12882.48 rub"; var bankEvents = await parser.InvokeAsync(pokupka, CancellationToken.None); }
public MatrixCrossword(int size, IEnumerable<string> horizontalExpressions, IEnumerable<string> verticalExpressions) { _size = size; _field = new char[size, size]; var parser = new RegexParser(); _horizontalQuestions = horizontalExpressions.Select(item => new CrosswordQuestion(parser.Parse(item), item)).ToArray(); _verticalQuestions = verticalExpressions.Select(item => new CrosswordQuestion(parser.Parse(item), item)).ToArray(); }
private Regex(string pattern, RegexOptions options, TimeSpan matchTimeout, bool useCache) { RegexTree tree; CachedCodeEntry cached = null; string cultureKey = null; if (pattern == null) throw new ArgumentNullException(nameof(pattern)); if (options < RegexOptions.None || (((int)options) >> MaxOptionShift) != 0) throw new ArgumentOutOfRangeException(nameof(options)); if ((options & RegexOptions.ECMAScript) != 0 && (options & ~(RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.CultureInvariant #if DEBUG | RegexOptions.Debug #endif )) != 0) throw new ArgumentOutOfRangeException(nameof(options)); ValidateMatchTimeout(matchTimeout); // Try to look up this regex in the cache. We do this regardless of whether useCache is true since there's // really no reason not to. if ((options & RegexOptions.CultureInvariant) != 0) cultureKey = CultureInfo.InvariantCulture.ToString(); // "English (United States)" else cultureKey = CultureInfo.CurrentCulture.ToString(); var key = new CachedCodeEntryKey(options, cultureKey, pattern); cached = LookupCachedAndUpdate(key); this.pattern = pattern; roptions = options; internalMatchTimeout = matchTimeout; if (cached == null) { // Parse the input tree = RegexParser.Parse(pattern, roptions); // Extract the relevant information capnames = tree._capnames; capslist = tree._capslist; _code = RegexWriter.Write(tree); caps = _code._caps; capsize = _code._capsize; InitializeReferences(); tree = null; if (useCache) cached = CacheCode(key); } else { caps = cached._caps; capnames = cached._capnames; capslist = cached._capslist; capsize = cached._capsize; _code = cached._code; _runnerref = cached._runnerref; _replref = cached._replref; _refsInitialized = true; } }
private Regex(String pattern, RegexOptions options, bool useCache) { RegexTree tree; CachedCodeEntry cached = null; string cultureKey = null; if (pattern == null) { throw new ArgumentNullException("pattern"); } if (options < RegexOptions.None || (((int)options) >> MaxOptionShift) != 0) { throw new ArgumentOutOfRangeException("options"); } if ((options & RegexOptions.ECMAScript) != 0 && (options & ~(RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.Timeboxed #if DBG | RegexOptions.Debug #endif )) != 0) { throw new ArgumentOutOfRangeException("options"); } // Try to look up this regex in the cache. We do this regardless of whether useCache is true since there's // really no reason not to. if ((options & RegexOptions.CultureInvariant) != 0) { cultureKey = CultureInfo.InvariantCulture.ThreeLetterWindowsLanguageName; } else { cultureKey = CultureInfo.CurrentCulture.ThreeLetterWindowsLanguageName; } String key = ((int)options).ToString(NumberFormatInfo.InvariantInfo) + ":" + cultureKey + ":" + pattern; cached = LookupCachedAndUpdate(key); this.pattern = pattern; this.roptions = options; if (cached == null) { // Parse the input tree = RegexParser.Parse(pattern, roptions); // Extract the relevant information capnames = tree._capnames; capslist = tree._capslist; code = RegexWriter.Write(tree); caps = code._caps; capsize = code._capsize; InitializeReferences(); tree = null; if (useCache) { cached = CacheCode(key); } } else { caps = cached._caps; capnames = cached._capnames; capslist = cached._capslist; capsize = cached._capsize; code = cached._code; factory = cached._factory; runnerref = cached._runnerref; replref = cached._replref; refsInitialized = true; } // if the compile option is set, then compile the code if it's not already if (UseOptionC() && factory == null) { factory = Compile(code, roptions); if (useCache && cached != null) { cached.AddCompiled(factory); } code = null; } if (UseOptionC()) { runnerref = new ExclusiveReference(); runnerref.Release(factory.CreateInstance()); } }
public void FoundOptionsInPatternIsCorrect(string pattern, RegexOptions expectedOptions) { RegexOptions foundOptions = RegexParser.ParseOptionsInPattern(pattern, RegexOptions.None); Assert.Equal(expectedOptions, foundOptions); }
private Regex(string pattern, RegexOptions options, TimeSpan matchTimeout, bool addToCache) { if (pattern == null) { throw new ArgumentNullException(nameof(pattern)); } if (options < RegexOptions.None || (((int)options) >> MaxOptionShift) != 0) { throw new ArgumentOutOfRangeException(nameof(options)); } if ((options & RegexOptions.ECMAScript) != 0 && (options & ~(RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.CultureInvariant #if DEBUG | RegexOptions.Debug #endif )) != 0) { throw new ArgumentOutOfRangeException(nameof(options)); } ValidateMatchTimeout(matchTimeout); string cultureKey; if ((options & RegexOptions.CultureInvariant) != 0) { cultureKey = CultureInfo.InvariantCulture.ToString(); } else { cultureKey = CultureInfo.CurrentCulture.ToString(); } // Try to look up this regex in the cache. var key = new CachedCodeEntryKey(options, cultureKey, pattern); CachedCodeEntry cached = LookupCachedAndUpdate(key); this.pattern = pattern; roptions = options; internalMatchTimeout = matchTimeout; if (cached == null) { // Parse the input RegexTree tree = RegexParser.Parse(pattern, roptions); // Extract the relevant information capnames = tree._capnames; capslist = tree._capslist; _code = RegexWriter.Write(tree); caps = _code._caps; capsize = _code._capsize; InitializeReferences(); tree = null; if (addToCache) { cached = CacheCode(key); } } else { caps = cached._caps; capnames = cached._capnames; capslist = cached._capslist; capsize = cached._capsize; _code = cached._code; #if FEATURE_COMPILED factory = cached._factory; #endif _runnerref = cached._runnerref; _replref = cached._replref; _refsInitialized = true; } #if FEATURE_COMPILED // if the compile option is set, then compile the code if it's not already if (UseOptionC() && factory == null) { factory = Compile(_code, roptions); if (addToCache && cached != null) { cached.AddCompiled(factory); } _code = null; } #endif }
private static INfa CreateNfa(string input) { var regex = new RegexParser().Parse(input); return(new ThompsonConstructionAlgorithm().Transform(regex)); }
protected internal override RegexRunner CreateInstance() => // Create a new interpreter instance. new RegexInterpreter(_code, RegexParser.GetTargetCulture(_code.Options));
/// <summary> /// 根据给定的源文件读取器解析正则表达式。 /// </summary> /// <param name="reader">正则表达式的源文件读取器。</param> /// <param name="option">正则表达式的选项。</param> /// <returns>解析得到的正则表达式。</returns> public static Regex Parse(SourceReader reader, RegexOptions option) { return(RegexParser.ParseRegex(reader, option, null)); }
/// <summary> /// 根据给定的源文件读取器解析正则表达式。 /// </summary> /// <param name="reader">正则表达式的源文件读取器。</param> /// <returns>解析得到的正则表达式。</returns> public static Regex Parse(SourceReader reader) { return(RegexParser.ParseRegex(reader, RegexOptions.None, null)); }
/// <summary> /// 从正则表达式的字符类模式获取 <see cref="RegexCharClass"/> 对象。 /// </summary> /// <param name="pattern">字符类模式。</param> /// <param name="option">正则表达式的选项。</param> /// <returns><see cref="RegexCharClass"/> 对象。</returns> public static RegexCharClass ParsePattern(string pattern, RegexOptions option) { return(RegexParser.ParseCharClass(pattern, option, false)); }
static void Main(string[] args) { //---------------------------------単体先頭マッチテスト-------------------------------------------- const string text = "aaabbbbTTTXYZAAA123456789"; var list = new List <Tuple <string, Regex> >(); //list.Add(Tuple.Create("Literal: match", Regex.Make().Literal("aaa"))); //list.Add(Tuple.Create("Literal: match", Regex.Make().Literal("aab"))); //list.Add(Tuple.Create("Any: match", Regex.Make().To(new Any()))); //list.Add(Tuple.Create("Any: match many times", Regex.Make().To(new Any()).To(new Any()).To(new Any()).To(new Any()).To(new Any()))); //list.Add(Tuple.Create("Not: unmatch", Regex.Make().To(new Not("a")))); //list.Add(Tuple.Create("Not: unmatch", Regex.Make().To(new Not("aaa")))); //list.Add(Tuple.Create("Not: match", Regex.Make().To(new Not("aab")))); //list.Add(Tuple.Create("Or: match on second arg", Regex.Make().To(new Or("xxxxxx", "aaa", "eeeee")))); //list.Add(Tuple.Create("Head: match", Regex.Make().To(new Head()))); //list.Add(Tuple.Create("Head: unmatch", Regex.Make().Literal("aaa").To(new Head()))); //list.Add(Tuple.Create("Tail: unmatch", Regex.Make().To(new Tail()))); //list.Add(Tuple.Create("Tail: match", Regex.Make().Literal(text).To(new Tail()))); //list.Add(Tuple.Create("?: match one", Regex.Make().To(new ZeroOrOne("a")))); //list.Add(Tuple.Create("?: match zero", Regex.Make().To(new ZeroOrOne("x")))); //list.Add(Tuple.Create("*: match zero", Regex.Make().To(new ZeroOrMore("x")))); //list.Add(Tuple.Create("*: match three", Regex.Make().To(new ZeroOrMore("a")))); //list.Add(Tuple.Create("+: unmatch", Regex.Make().To(new OneOrMore("x")))); //list.Add(Tuple.Create("+: match three", Regex.Make().To(new OneOrMore("a")))); //list.Add(Tuple.Create("+: match six", Regex.Make().To(new OneOrMore(new OneOrMore("a")))));//GREAT! //list.Add(Tuple.Create("(?=): unmatch", Regex.Make().To(new PositiveLookahead("aaa", "a")))); //list.Add(Tuple.Create("(?=): match", Regex.Make().To(new PositiveLookahead("aaa", "b")))); //list.Add(Tuple.Create("(?!): unmatch", Regex.Make().To(new NegativeLookahead("aaa", "a")))); //list.Add(Tuple.Create("(?!): match", Regex.Make().To(new NegativeLookahead("aaa", "b")))); //list.Add(Tuple.Create("(?!): match", Regex.Make().To(new PositiveLookbehind("bbb", "a")))); //list.Add(Tuple.Create("(?!): unmatch", Regex.Make().To(new PositiveLookbehind("bbb", "x")))); //list.Add(Tuple.Create("(?!): 1 match", Regex.Make().To(new NegativeLookbehind("bbb", "a")))); //list.Add(Tuple.Create("(?!): 2 match", Regex.Make().To(new NegativeLookbehind("bbb", "x")))); for (int i = 0; i < list.Count; i++) { ShowLog(list[i].Item1, text, list[i].Item2); } //---------------------------------結合先頭マッチテスト-------------------------------------------- var list2 = new List <Tuple <string, Regex, string> >(); var rgx1 = Regex.Make() .Literal("r") .To(new ZeroOrOne("e")) .To(new Any()) .To(new OrInvert('g')) .To(new Or("e", "x")) .To(new Or("e")); //list2.Add(Tuple.Create("re?.[^g][ex][e]: match", rgx1, "rgrxe")); //list2.Add(Tuple.Create("re?.[^g][ex][e]: match", rgx1, "rexee")); //list2.Add(Tuple.Create("re?.[^g][ex][e]: match", rgx1, "rekvee")); //list2.Add(Tuple.Create("re?.[^g][ex][e]: unmatch", rgx1, "rekverrr")); foreach (var tuple in list2) { ShowLog(tuple.Item1, tuple.Item3, tuple.Item2); } //---------------------------------全体マッチテスト-------------------------------------------- var list3 = new List <Tuple <string, Regex> >(); var regg = new Capture(new OneOrMore(new Or(new OrInvert('|'), new Escaped(new Char('|'))))); //|に挟まれる奴 var a = new ZeroOrMore(new Or(regg, new UnEscaped('|'))); //[]の中身 var aa = new UnEscaped('[').To(a).To(new UnEscaped(']')); var aasaa = new OneOrMore(regg.To(new Literal("a"))).To(regg); var b0 = new OrInvert('\\', '[', ']');//\じゃないやつ var b1 = new Literal(@"\").To(new Any()); var b2 = new OneOrMore(new Capture(new Or(b0, b1))); var b3 = new UnEscaped('['); var b4 = new UnEscaped(']'); var b = b3.To(b2); //list3.Add(Tuple.Create(@"as\[asd[a\d\]d\ds]", new UnEscapedOrBrace() as Regex)); //list3.Add(Tuple.Create("ffabtabeaabbab", Regex.Make().To(new OneOrMore(new Literal("a").To(new Literal("b")))))); //list3.Add(Tuple.Create("aatestatest", Regex.Make().Literal("test"))); //list3.Add(Tuple.Create("aatestatesttasttust", Regex.Make().Literal("t").To(new Capture(new Any())).Literal("st"))); //list3.Add(Tuple.Create("aatestteaatestesaates", Regex.Make().To(new Named("Label",new Literal("a"))).To(new Reference("Label")))); var rg = Regex.Make().Literal("(").To(new ZeroOrMore(new Any())).Literal(")"); //単純括弧ok var rg2 = Regex.Make().To(new ZeroOrMore(new OrInvert('(', ')'))); //括弧じゃない奴らの連続ちょっとちがうけどok var rg3 = Regex.Make().To(new Or("()", new OneOrMore(new OrInvert('(', ')')))); //ok var rgx4 = Regex.Make().To(new Named("kakko", new UnEscaped('(').To(new ZeroOrMore(new Or(new OrInvert('(', ')'), new Reference("kakko")))).Literal(")"))); //括弧とれた!!!! //var rg5 = new UnEscaped(new Literal("("));//エスケープされない括弧開き var escapedB = new PositiveLookbehind(new Literal("("), new Or(new Head(), new OrInvert('\\')).To(new Literal(@"\")).To(new ZeroOrMore(new Literal(@"\\")))); var escapedB2 = new PositiveLookbehind(new Literal(")"), new Or(new Head(), new OrInvert('\\')).To(new Literal(@"\")).To(new ZeroOrMore(new Literal(@"\\")))); var independentPatern = new Or(new Literal(@"\"), new UnEscapedBraces(), new OrInvert(new MetaChar()), ".", "^", "$"); var patern = new ZeroOrOne(new LookBehindSyntax()).To(independentPatern).To(new ZeroOrOne(new Or(new LookAheadSyntax(), new Repeater().To(new ZeroOrOne("?")))));//後置、前置ともに最大ひとつしか取れない仕様で var patterns = new OneOrMore(patern); var regexPattern = new Named("RGP", new Capture(patterns).To(new ZeroOrOne(new Literal("|").To(new Reference("RGP"))))); //var rgx44 = Regex.Make().To(new Named("kakko", new UnEscaped(new Literal("(")).To(new ZeroOrMore(new Or(new OrInvert('(', ')'),escapedB,escapedB2, new Reference("kakko")))).To(new UnEscaped(new Literal(")")))));//括弧とれた!!!! //list3.Add(Tuple.Create("aaa(ddd)fff", rg));//ok //list3.Add(Tuple.Create("a.a?a*a+(ddd)f+f*f", rgrg2 as Regex));//ok //list3.Add(Tuple.Create("aa(?<=a)aa", rgrg2 as Regex));//ok //list3.Add(Tuple.Create("aa(?<!a)aa", rgrg2 as Regex));//ok //list3.Add(Tuple.Create("aa(?=a)aa", rgrg2 as Regex));//ok //list3.Add(Tuple.Create("aa(?!a)aa", rgrg2 as Regex));//ok //list3.Add(Tuple.Create("aa(?<=a)a(?=a)a", rgrg2 as Regex));//ok //list3.Add(Tuple.Create("aa(?!a)aa(?=a)a", rgrg2 as Regex));//ok //list3.Add(Tuple.Create("aa(?!a)aa*a", rgrg2 as Regex));//ok //list3.Add(Tuple.Create("aa(?!a)a|a*a|a", regexPattern as Regex));//ok //list3.Add(Tuple.Create("aa(?!a)a++a", rgrg2 as Regex));//ok //list3.Add(Tuple.Create("{1,3a}", new CountRepeaterSyntax() as Regex));//ok //list3.Add(Tuple.Create("{1, }", new CountRepeaterSyntax() as Regex));//ok //list3.Add(Tuple.Create("{14}", new CountRepeaterSyntax() as Regex));//ok //list3.Add(Tuple.Create("aaa(ddd)f(f)f", rg));//ok //list3.Add(Tuple.Create("aaa(d(d)d)f(f)f", rg));//ok //list3.Add(Tuple.Create("aatestatest", Regex.Make().To(new OneOrMore(new Literal("a"))))); //list3.Add(Tuple.Create("xy", Regex.Make().To(new ZeroOrMore("a")))); //list3.Add(Tuple.Create("aaasd(dsff)fsdf()(sdf)sd((dfg(df)A(A)S()F(A",rg2)); //list3.Add(Tuple.Create("fsdf()(sdf))A(A)S()F(A",rg3)); //list3.Add(Tuple.Create("()",rgx4)); //list3.Add(Tuple.Create("(a)",rgx4)); //list3.Add(Tuple.Create("(aa)",rgx4)); //list3.Add(Tuple.Create("(a(a))",rgx4)); //list3.Add(Tuple.Create("bb(a(a))", rgx4)); //list3.Add(Tuple.Create("nn(a(a)nn", rgx4)); //list3.Add(Tuple.Create("a", new CountRepeater("a",0,1) as Regex)); //list3.Add(Tuple.Create("a,aa,aaa,aaaa,aaaaa", new CountRepeater("a",1,1) as Regex)); //list3.Add(Tuple.Create("a,aa,aaa,aaaa,aaaaa", new CountRepeater("a",2,2) as Regex)); //list3.Add(Tuple.Create("a,aa,aaa,aaaa,aaaaa", new CountRepeater("a",1,2) as Regex)); //list3.Add(Tuple.Create("a,aa,aaa,aaaa,aaaaa", new CountRepeater("a",2,3) as Regex)); //list3.Add(Tuple.Create("nn(ann", RegexParser.RegexPattern)); //list3.Add(Tuple.Create("123", Regex.Make().To(new Digit()))); //list3.Add(Tuple.Create("123", Regex.Make().To(new OneOrMore(new Digit())))); //list3.Add(Tuple.Create(@"\\(()a\()aa(\\\(a)", new UnEscapedBraces() as Regex)); //list3.Add(Tuple.Create(@"\\a\))\da(a\\s\(", new NegativeLookBehindSyntax(true).To(new Capture(independentPatern.To(new ZeroOrOne(new Or(new LookAheadSyntax(), new Repeater().To(new ZeroOrOne("?"))))))) as Regex)); foreach (var tuple in list3) { ShowLog("3rdTest::", tuple.Item1, tuple.Item2); Console.ReadLine(); } //Console.ReadLine(); var strList = new List <string>(); //strList.Add(@"aaaaaaaaaaa");//単純リテラル //strList.Add(@"\\aaa\\ss\\sss");//エスケープリテラル //strList.Add(@"\\a\daa\\s\d\ds\\\dsss");//エスケープ //strList.Add(@"\\a\))\da(a\\s\(");//エスケープ //strList.Add(@"\\d");//エスケープ //strList.Add(@"\\(a)\d");//エスケープ //strList.Add(@"\\a(\))a)(as\(");//エスケープ //strList.Add(@"\\a(\))\da)(a\\s\(");//エスケープ //strList.Add(@"\\ddf(gh(df)gh)(df)gh");//エスケープ //strList.Add(@"aaaaa(bbbb)ccc");//エスケープ //strList.Add(@"aaaaa(?:bbbb)ccc");//エスケープ //strList.Add(@"aaaaa(?=bbbb)ccc");//エスケープ //strList.Add(@"aaaaa(?!bbbb)ccc");//エスケープ //strList.Add(@"aaaaa(?<=bbbb)ccc");//エスケープ //strList.Add(@"aaaa(?=a(??bbbb)c)cc");//エスケープ //strList.Add(@"aaaa(?=a(??bbbb)c)cc");//エスケープ //strList.Add(@"a+bc");//エスケープ //strList.Add(@"a?bc");//エスケープ //strList.Add(@"a*bc");//エスケープ //strList.Add(@"a.bc");//エスケープ //strList.Add(@"a^bc");//エスケープ //strList.Add(@"a\$\[$b]c");//エスケープ //strList.Add(@"a[abcde]b");//エスケープ //strList.Add(@"a[^abcde]b");//エスケープ //strList.Add(@"a(");//エスケープ int count = 0; foreach (var regStr in strList) { Console.WriteLine("------------" + (count++) + "----------------"); Console.WriteLine("\n@@@ParseProccess@@@"); var reg = RegexParser.Parse(regStr); Console.WriteLine("\n@@@Structure@@@"); Console.WriteLine(reg.ToStructureString()); Console.ReadLine(); } var parseList = new List <Tuple <string, string> >(); //parseList.Add(Tuple.Create(@"\d\d\d-\d\d\d\d", "00000000")); //parseList.Add(Tuple.Create(@"\d\d\d-\d\d\d\d", "000-0000")); //parseList.Add(Tuple.Create(@"b.k", "bak")); //parseList.Add(Tuple.Create(@"b.k", "btk")); //parseList.Add(Tuple.Create(@"b.k", "btrk")); //parseList.Add(Tuple.Create(@"b.+k", "btrk")); //parseList.Add(Tuple.Create(@"b.+k", "bk")); //parseList.Add(Tuple.Create(@"b.+k", "btssrkss")); //parseList.Add(Tuple.Create(@"b.*k", "btrk")); //parseList.Add(Tuple.Create(@"b.*k", "bk")); //parseList.Add(Tuple.Create(@"[13579]", "4")); //parseList.Add(Tuple.Create(@"[13579]", "4123445678")); foreach (var tuple in parseList) { var reg = RegexParser.Parse(tuple.Item1); ShowLog(tuple.Item1 + " ===> " + tuple.Item2, tuple.Item2, reg); Console.ReadLine(); } while (true) { Console.Write("imput [Rr]egex: \t>"); string reg = Console.ReadLine(); Console.Write("input [Tt]ext: \t>"); string te = Console.ReadLine(); var regex = RegexParser.Parse(reg); ShowLog(reg + " ===> " + te, te, regex); Console.WriteLine("\n\n"); } }
private static (RegexTree Tree, AnalysisResults Analysis) Analyze(string pattern) { RegexTree tree = RegexParser.Parse(pattern, RegexOptions.None, CultureInfo.InvariantCulture); return(tree, RegexTreeAnalyzer.Analyze(tree)); }
// Returns null if nothing to do, Diagnostic if there's an error to report, or RegexType if the type was analyzed successfully. private static object?GetSemanticTargetForGeneration( GeneratorAttributeSyntaxContext context, CancellationToken cancellationToken) { var methodSyntax = (MethodDeclarationSyntax)context.TargetNode; SemanticModel sm = context.SemanticModel; Compilation compilation = sm.Compilation; INamedTypeSymbol?regexSymbol = compilation.GetBestTypeByMetadataName(RegexName); INamedTypeSymbol?generatedRegexAttributeSymbol = compilation.GetBestTypeByMetadataName(GeneratedRegexAttributeName); if (regexSymbol is null || generatedRegexAttributeSymbol is null) { // Required types aren't available return(null); } TypeDeclarationSyntax?typeDec = methodSyntax.Parent as TypeDeclarationSyntax; if (typeDec is null) { return(null); } IMethodSymbol regexMethodSymbol = context.TargetSymbol as IMethodSymbol; if (regexMethodSymbol is null) { return(null); } ImmutableArray <AttributeData>?boundAttributes = regexMethodSymbol.GetAttributes(); if (boundAttributes is null || boundAttributes.Value.Length == 0) { return(null); } bool attributeFound = false; string?pattern = null; int? options = null; int? matchTimeout = null; string?cultureName = string.Empty; foreach (AttributeData attributeData in boundAttributes) { if (!SymbolEqualityComparer.Default.Equals(attributeData.AttributeClass, generatedRegexAttributeSymbol)) { continue; } if (attributeData.ConstructorArguments.Any(ca => ca.Kind == TypedConstantKind.Error)) { return(Diagnostic.Create(DiagnosticDescriptors.InvalidGeneratedRegexAttribute, methodSyntax.GetLocation())); } if (pattern is not null) { return(Diagnostic.Create(DiagnosticDescriptors.MultipleGeneratedRegexAttributes, methodSyntax.GetLocation())); } ImmutableArray <TypedConstant> items = attributeData.ConstructorArguments; if (items.Length == 0 || items.Length > 4) { return(Diagnostic.Create(DiagnosticDescriptors.InvalidGeneratedRegexAttribute, methodSyntax.GetLocation())); } attributeFound = true; pattern = items[0].Value as string; if (items.Length >= 2) { options = items[1].Value as int?; if (items.Length == 4) { matchTimeout = items[2].Value as int?; cultureName = items[3].Value as string; } // If there are 3 parameters, we need to check if the third argument is // int matchTimeoutMilliseconds, or string cultureName. else if (items.Length == 3) { if (items[2].Type.SpecialType == SpecialType.System_Int32) { matchTimeout = items[2].Value as int?; } else { cultureName = items[2].Value as string; } } } } if (!attributeFound) { return(null); } if (pattern is null || cultureName is null) { return(Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "(null)")); } if (!regexMethodSymbol.IsPartialDefinition || regexMethodSymbol.IsAbstract || regexMethodSymbol.Parameters.Length != 0 || regexMethodSymbol.Arity != 0 || !SymbolEqualityComparer.Default.Equals(regexMethodSymbol.ReturnType, regexSymbol)) { return(Diagnostic.Create(DiagnosticDescriptors.RegexMethodMustHaveValidSignature, methodSyntax.GetLocation())); } RegexOptions regexOptions = options is not null ? (RegexOptions)options : RegexOptions.None; // If RegexOptions.IgnoreCase was specified or the inline ignore case option `(?i)` is present in the pattern, then we will (in priority order): // - If a culture name was passed in: // - If RegexOptions.CultureInvariant was also passed in, then we emit a diagnostic due to the explicit conflict. // - We try to initialize a culture using the passed in culture name to be used for case-sensitive comparisons. If // the culture name is invalid, we'll emit a diagnostic. // - Default to use Invariant Culture if no culture name was passed in. CultureInfo culture = CultureInfo.InvariantCulture; RegexOptions regexOptionsWithPatternOptions; try { regexOptionsWithPatternOptions = regexOptions | RegexParser.ParseOptionsInPattern(pattern, regexOptions); } catch (Exception e) { return(Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), e.Message)); } if ((regexOptionsWithPatternOptions & RegexOptions.IgnoreCase) != 0 && !string.IsNullOrEmpty(cultureName)) { if ((regexOptions & RegexOptions.CultureInvariant) != 0) { // User passed in both a culture name and set RegexOptions.CultureInvariant which causes an explicit conflict. return(Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "cultureName")); } try { culture = CultureInfo.GetCultureInfo(cultureName); } catch (CultureNotFoundException) { return(Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "cultureName")); } } // Validate the options const RegexOptions SupportedOptions = RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.ECMAScript | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.NonBacktracking | RegexOptions.RightToLeft | RegexOptions.Singleline; if ((regexOptions & ~SupportedOptions) != 0) { return(Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "options")); } // Validate the timeout if (matchTimeout is 0 or < -1) { return(Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "matchTimeout")); } // Parse the input pattern RegexTree regexTree; AnalysisResults analysis; try { regexTree = RegexParser.Parse(pattern, regexOptions | RegexOptions.Compiled, culture); // make sure Compiled is included to get all optimizations applied to it analysis = RegexTreeAnalyzer.Analyze(regexTree); } catch (Exception e) { return(Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), e.Message)); } // Determine the namespace the class is declared in, if any string?ns = regexMethodSymbol.ContainingType?.ContainingNamespace?.ToDisplayString( SymbolDisplayFormat.FullyQualifiedFormat.WithGlobalNamespaceStyle(SymbolDisplayGlobalNamespaceStyle.Omitted)); var regexType = new RegexType( typeDec is RecordDeclarationSyntax rds ? $"{typeDec.Keyword.ValueText} {rds.ClassOrStructKeyword}" : typeDec.Keyword.ValueText, ns ?? string.Empty, $"{typeDec.Identifier}{typeDec.TypeParameterList}"); var regexMethod = new RegexMethod( regexType, methodSyntax, regexMethodSymbol.Name, methodSyntax.Modifiers.ToString(), pattern, regexOptions, matchTimeout, regexTree, analysis); RegexType current = regexType; var parent = typeDec.Parent as TypeDeclarationSyntax; while (parent is not null && IsAllowedKind(parent.Kind())) { current.Parent = new RegexType( parent is RecordDeclarationSyntax rds2 ? $"{parent.Keyword.ValueText} {rds2.ClassOrStructKeyword}" : parent.Keyword.ValueText, ns ?? string.Empty, $"{parent.Identifier}{parent.TypeParameterList}"); current = current.Parent; parent = parent.Parent as TypeDeclarationSyntax; } return(regexMethod);
public SymbolicFiniteAutomaton <TConstraint> Convert(string regex, RegexOptions options) { var op = options.RemoveFlags(RegexOptions.RightToLeft); return(ConvertNode(RegexParser.Parse(regex, op).Root, 0, true, true)); }
/// <summary> /// 根据给定的源文件读取器解析正则表达式。 /// </summary> /// <param name="reader">正则表达式的源文件读取器。</param> /// <param name="option">正则表达式的选项。</param> /// <param name="regexDef">正则表达式的定义。</param> /// <returns>解析得到的正则表达式。</returns> public static Regex Parse(SourceReader reader, RegexOptions option, IDictionary <string, Regex> regexDef) { return(RegexParser.ParseRegex(reader, option, regexDef)); }
public void DescribeSet(string set, string expected) { RegexNode setNode = RegexParser.Parse($"{set}", RegexOptions.None, CultureInfo.InvariantCulture).Root.Child(0); Assert.Equal(expected, RegexCharClass.DescribeSet(setNode.Str !)); }
/// <summary> /// 根据给定的字符串解析正则表达式。 /// </summary> /// <param name="pattern">正则表达式的模式字符串。</param> /// <param name="option">正则表达式的选项。</param> /// <returns>解析得到的正则表达式。</returns> public static Regex Parse(string pattern, RegexOptions option) { return(RegexParser.ParseRegex(pattern, option, null)); }
/// <summary> /// 根据给定的字符串解析正则表达式。 /// </summary> /// <param name="pattern">正则表达式的模式字符串。</param> /// <param name="regexDef">正则表达式的定义。</param> /// <returns>解析得到的正则表达式。</returns> public static Regex Parse(string pattern, IDictionary <string, Regex> regexDef) { return(RegexParser.ParseRegex(pattern, RegexOptions.None, regexDef)); }
private bool Accept(RegexParser p, BnfStringifyVisitor state) { state.Append("/", p.Pattern, "/"); return(true); }
private Regex(string pattern, RegexOptions options, bool useCache) { CachedCodeEntry cachedAndUpdate = null; string str = null; if (pattern == null) { throw new ArgumentNullException("pattern"); } if ((options < RegexOptions.None) || ((((int)options) >> 10) != 0)) { throw new ArgumentOutOfRangeException("options"); } if (((options & RegexOptions.ECMAScript) != RegexOptions.None) && ((options & ~(RegexOptions.CultureInvariant | RegexOptions.ECMAScript | RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase)) != RegexOptions.None)) { throw new ArgumentOutOfRangeException("options"); } if ((options & RegexOptions.CultureInvariant) != RegexOptions.None) { str = CultureInfo.InvariantCulture.ToString(); } else { str = CultureInfo.CurrentCulture.ToString(); } string[] strArray = new string[] { ((int)options).ToString(NumberFormatInfo.InvariantInfo), ":", str, ":", pattern }; string key = string.Concat(strArray); cachedAndUpdate = LookupCachedAndUpdate(key); this.pattern = pattern; this.roptions = options; if (cachedAndUpdate == null) { RegexTree t = RegexParser.Parse(pattern, this.roptions); this.capnames = t._capnames; this.capslist = t._capslist; this.code = RegexWriter.Write(t); this.caps = this.code._caps; this.capsize = this.code._capsize; this.InitializeReferences(); t = null; if (useCache) { cachedAndUpdate = this.CacheCode(key); } } else { this.caps = cachedAndUpdate._caps; this.capnames = cachedAndUpdate._capnames; this.capslist = cachedAndUpdate._capslist; this.capsize = cachedAndUpdate._capsize; this.code = cachedAndUpdate._code; this.factory = cachedAndUpdate._factory; this.runnerref = cachedAndUpdate._runnerref; this.replref = cachedAndUpdate._replref; this.refsInitialized = true; } if (this.UseOptionC() && (this.factory == null)) { this.factory = this.Compile(this.code, this.roptions); if (useCache && (cachedAndUpdate != null)) { cachedAndUpdate.AddCompiled(this.factory); } this.code = null; } }
/// <summary> /// 定义一个终结符。 /// </summary> /// <param name="id">终结符的标识符。</param> /// <param name="regex">终结符对应的正则表达式。</param> /// <param name="action">终结符的动作。</param> private void InternalDefineSymbol(T id, string regex, Action <ReaderController <T> > action) { IEnumerable <string> context = ParseContexts(ref regex); InternalDefineSymbol(id, RegexParser.ParseRegex(regex, RegexOptions.None, this.regexs), action, context); }
private void FillTweetComment(Tweet tweet, SiteEntity site) { if (tweet.Comment == 0) { return; } int currentPage = 1; string mid = tweet.Mid; try { while (true) { string url = string.Format(CommentUrlFormat, mid, currentPage); var request = BuildRequest(url); CrawlResponse response = null; for (int i = 0; i < 5; i++) { try { response = GeckoRequestProcessor.DoRequest(request, site, null, null); AggrSum(); } catch {} if (response.Status != Enums.CrawlResult.Succ) { Logger.Info("访问页面错误:Url = " + response.Url); } else { break; } } CommentJsonResponse tmpResult = JsonConvert.DeserializeObject <CommentJsonResponse>(response.Content.Trim("</pre>".ToArray())); response.Content = HttpUtility.HtmlDecode(tmpResult.data.html); var pageMatch = Regex.Match(response.Content, RegexCommentPage, RegexOptions.IgnoreCase | RegexOptions.Multiline); if (currentPage != 1 && (!pageMatch.Success || pageMatch.Groups["CurrentPageNum"].Value != currentPage.ToString(CultureInfo.InvariantCulture))) { return; } //Fill Tweet var matches = Regex.Matches(response.Content, RegexComment, RegexOptions.IgnoreCase | RegexOptions.Multiline); foreach (Match match in matches) { Comment comment = new Comment(); comment.Author = match.Groups["Author"].Value; comment.AuthorUrl = RegexParser.AbsoluteUrl(match.Groups["AuthorUrl"].Value, tweet.Url, true); comment.Content = TextCleaner.FullClean(match.Groups["Content"].Value); comment.PubDate = DateTimeParser.Parser(match.Groups["PubDate"].Value) ?? DateTime.MinValue; tweet.Comments.Add(comment); } currentPage++; } } catch { } }
/// <summary> /// 定义一个指定名称的正则表达式。 /// </summary> /// <param name="name">正则表达式的名称。</param> /// <param name="regex">定义的正则表达式。</param> public void DefineRegex(string name, string regex) { regexs[name] = RegexParser.ParseRegex(regex, RegexOptions.None, regexs); }
/// <summary> /// Convert a regex pattern to an equivalent symbolic regex /// </summary> /// <param name="regex">the given .NET regex pattern</param> /// <param name="options">regular expression options for the pattern (default is RegexOptions.None)</param> /// <param name="keepAnchors">if false (default) then anchors are replaced by equivalent regexes</param> public SymbolicRegexNode <S> ConvertToSymbolicRegex(string regex, RegexOptions options, bool keepAnchors = false) { RegexTree tree = RegexParser.Parse(regex, options); return(ConvertToSymbolicRegex(tree._root, keepAnchors)); }
private void TestRegExpToSql(PatternParser outputDefn, string input, string expected) { var patternOpts = RegexParser.Parse(input); Assert.AreEqual(expected, outputDefn.Render(patternOpts)); }
// Returns a Regex object corresponding to the given pattern, compiled with // the specified options. /// <include file='doc\Regex.uex' path='docs/doc[@for="Regex.Regex1"]/*' /> /// <devdoc> /// <para> /// Creates and compiles a regular expression object for the /// specified regular expression /// with options that modify the pattern. /// </para> /// </devdoc> public Regex(String pattern, RegexOptions options) { RegexTree tree; CachedCodeEntry cached; if (pattern == null) { throw new ArgumentNullException("pattern"); } if (options < RegexOptions.None || (((int)options) >> MaxOptionShift) != 0) { throw new ArgumentOutOfRangeException("options"); } if ((options & RegexOptions.ECMAScript) != 0 && (options & ~(RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Compiled #if DBG | RegexOptions.Debug #endif )) != 0) { throw new ArgumentOutOfRangeException("options"); } String key = ((int)options).ToString(NumberFormatInfo.InvariantInfo) + ":" + pattern; cached = LookupCached(key); this.pattern = pattern; this.roptions = options; if (cached == null) { // Parse the input tree = RegexParser.Parse(pattern, roptions); // Extract the relevant information capnames = tree._capnames; capslist = tree._capslist; code = RegexWriter.Write(tree); caps = code._caps; capsize = code._capsize; InitializeReferences(); tree = null; cachedentry = CacheCode(key); } else { caps = cached._caps; capnames = cached._capnames; capslist = cached._capslist; capsize = cached._capsize; code = cached._code; factory = cached._factory; runnerref = cached._runnerref; replref = cached._replref; refsInitialized = true; cachedentry = cached; } // if the compile option is set, then compile the code if it's not already if (UseOptionC() && factory == null) { factory = Compile(code, roptions); cachedentry.AddCompiled(factory); code = null; } }
public void Add(SymbolId symbol, string regex, bool caseSensitive = true) { this.Add(symbol, RegexParser.Parse(regex, symbol).ToInvariant(this.mapper, this.provider, caseSensitive)); }
private static RegexFindOptimizations ComputeOptimizations(string pattern, RegexOptions options) { RegexTree tree = RegexParser.Parse(pattern, options, CultureInfo.InvariantCulture); return(new RegexFindOptimizations(tree.Root, options, CultureInfo.InvariantCulture)); }
//regex public static IIndentationGrammarAnalysis <SyntaxToken, SyntaxNode, GNode> match <GNode, TParent, T>( this IIndentationGrammarAnalysis <SyntaxToken, SyntaxNode, GNode> self, Regex pattern, Action <IIndentationGrammarAnalysis <SyntaxToken, SyntaxNode, GNode> > children = null, Action <TParent, T> then = null) where T : GNode, new() => match <GNode, TParent, T>(self, RegexParser.Create <T>(pattern), children, then);
public static string Interpret(string file) { var tokenTable = new TokenTable(); var lexFileMode = LexFileMode.Normal; var llText = File.ReadAllText(file); var modeRegex = new Regex(@"^[a-zA-Z_][0-9a-zA-Z_-]*$"); var tokenRegex = new Regex(@"^([^\s]+)\s+(([a-zA-Z_][0-9a-zA-Z_-]*)|(%%))(\s+([a-zA-Z_][0-9a-zA-Z_-]*))?$"); var lines = llText .Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries) .Select(x => x.Trim()); var lexerMode = 0; var lexerModes = new Dictionary<string, int>(); var tokenTypes = new List<string>(); var keywords = new List<string>(); string llName = lines.First(); lines = lines.Skip(1); var lastDot = llName.LastIndexOf('.'); string ns = llName.Remove(lastDot); string name = llName.Substring(lastDot + 1); string lexerName = name + "Lexer"; string tokenName = name + "Token"; string tokenTypeName = name + "TokenType"; string codeLexeme = null; string code = null; string buffer = null; string keywordDefault = ""; string keywordTail = ""; Action<string> registerMode = x => { if (!lexerModes.ContainsKey(x)) lexerModes.Add(x, lexerMode++); }; foreach (var line in lines) { if (lexFileMode == LexFileMode.Normal) { Match m = null; if (modeRegex.IsMatch(line)) { registerMode(line); tokenTable.SetMode(lexerModes[line]); } else if (line == KeywordDelimiter) { lexFileMode = LexFileMode.Keyword; continue; } else if (line == CodeDelimiter) { lexFileMode = LexFileMode.Code; codeLexeme = null; code = ""; continue; } else if ((m = tokenRegex.Match(line)) != null) { var regex = m.Groups[1].Value; var regexLexer = new RegexLexer(regex); var tokens = regexLexer.GetTokens(); var parser = new RegexParser(tokens.ToArray()); var ast = parser.Parse(); var compiler = new RegexCompiler(ast); var strings = compiler.ExpandRegex(); foreach (var lexeme in strings) { var tokenType = m.Groups[2].Value; if (tokenType == CodeDelimiter) { codeLexeme = lexeme; code = ""; lexFileMode = LexFileMode.Code; continue; } else if (!tokenTypes.Contains(tokenType)) tokenTypes.Add(tokenType); var newMode = m.Groups[6].Value; if (!string.IsNullOrEmpty(newMode)) { registerMode(newMode); tokenTable.Add(lexeme, tokenType, lexerModes[newMode]); } else tokenTable.Add(lexeme, tokenType); } } } else if (lexFileMode == LexFileMode.Code) { if (line == CodeDelimiter) { if (codeLexeme != null) tokenTable.AddLexemeCode(codeLexeme, code); else tokenTable.AddCode(code); lexFileMode = LexFileMode.Normal; continue; } else code += line + "\r\n"; } else if (lexFileMode == LexFileMode.Keyword) { if (line == KeywordDelimiter) { lexFileMode = LexFileMode.Normal; continue; } else if (line == CodeDelimiter) { lexFileMode = LexFileMode.KeywordDefault; continue; } else if (line != "") { keywords.Add(line); tokenTable.AddKeyword(line); } } else if (lexFileMode == LexFileMode.KeywordDefault) { if (line == CodeDelimiter) { if (string.IsNullOrEmpty(keywordDefault)) { keywordDefault = buffer; } else { keywordTail = buffer; } buffer = ""; lexFileMode = LexFileMode.Keyword; continue; } else { buffer += line + "\r\n"; } } } foreach (var keyword in keywords) { var t = keyword + "Keyword"; if (keywordTail != null) { tokenTable.AddLexemeCode(keyword, keywordTail.Replace("{Keyword}", t)); } else { tokenTable.Add(keyword, t); } } if (!string.IsNullOrEmpty(keywordDefault)) { var k = keywords .SelectMany(x => Enumerable .Range(1, x.Length - 1) .Select(y => x.Remove(y)) .ToArray()) .Distinct() .ToArray(); foreach (var i in k) { if (tokenTable.Lists.Any(x => x.Value.Any(y => y.Lexeme == i))) { continue; } tokenTable.AddLexemeCode(i, keywordDefault); } } //var tuples = tokenTable.Lists[1] // .Where(x => x.TokenType != "None" && x.NewMode == null) // .Concat(tokenTable.Keywords // .Select(y => new TokenEntry(y, y + "Keyword"))) // .Select(x => string.Format( // "Tuple.Create(TokenType.{0},\"{1}\"),", // x.TokenType, // Char.IsWhiteSpace(x.Lexeme[0]) ? string.Format("\\x{0:X2}", (int)x.Lexeme[0]) : // x.Lexeme == "\\" ? "\\\\" : // x.Lexeme)) // .Aggregate((x, y) => x + "\r\n" + y); var generator = new LexerGenerator(tokenTable); var lexer = generator.Generate(); return lexer .Replace("{Lexer}", lexerName) .Replace("{Token}", tokenName) .Replace("{TokenType}", tokenTypeName) .Replace("{LexerNamespace}", ns); }
public void MatchGroupsToString_NoMatches_MatchNothing() { var match = RegexParser.MatchGroupsToString(oneLineString, @"John went to the (.*)$", "[0]").FirstOrDefault(); Assert.Null(match); }