public static PatternNode Parse(string input) { PatternNode resultNode = new PatternNode(input); int index = 0; int leftParenthesisCount = 0; List<string> childNodeStr = new List<string>(); StringBuilder currentScanStr = new StringBuilder(); while (index < input.Length) { char currentChar = input[index]; switch (currentChar) { case '|': if (leftParenthesisCount == 0) { if (currentScanStr.Length > 0) { childNodeStr.Add(currentScanStr.ToString()); currentScanStr.Remove(0, currentScanStr.Length); } } else { currentScanStr.Append(currentChar); } break; case '(': leftParenthesisCount++; currentScanStr.Append(currentChar); break; case ')': leftParenthesisCount--; currentScanStr.Append(currentChar); if (index + 1 < input.Length && input[index + 1] == '*') { currentScanStr.Append('*'); index++; } break; default: currentScanStr.Append(currentChar); break; } index++; } if (leftParenthesisCount != 0) { throw new ApplicationException("括号不匹配"); } if (currentScanStr.Length > 0) { childNodeStr.Add(currentScanStr.ToString()); currentScanStr.Remove(0, currentScanStr.Length); } if (childNodeStr.Count > 1) { //本级有or关系,如“a|b” childNodeStr.ForEach((str) => resultNode.Nodes.Add(new PatternNode(str))); resultNode.Releation = Releation.Or; resultNode.Nodes.ForEach((pattNode) => ProcessAndReleation(pattNode)); } else { ProcessAndReleation(resultNode); } return resultNode; }
private void showParseNode(PatternNode pattNode, TreeNodeCollection coll) { if (pattNode.Nodes.Count == 0) { coll.Add(pattNode.Text); return; } for (int i = 0; i < pattNode.Nodes.Count; i++) { PatternNode node = pattNode.Nodes[i]; TreeNode treeNode = new TreeNode(node.Text + ":"+node.Releation.ToString() +":"+node.OneOrMore); coll.Add(treeNode); if (node.Nodes.Count != 0) showParseNode(node,treeNode.Nodes); } }
public static RegexNode GetParseNode(PatternNode rootPatt) { RegexNode root = new RegexNode(); root.Releation = rootPatt.Releation; root.Text = rootPatt.Text; if (rootPatt.Nodes.Count == 0) { if (rootPatt.OneOrMore) { string toMatch = rootPatt.Text.TrimEnd(new[]{'*',')'}); toMatch = toMatch.TrimStart('('); root.Parse = ParseFuncFactory.OneOrMoreMaxMatch(toMatch); } else { root.Parse = ParseFuncFactory.MaxMatch(rootPatt.Text); } return root; } foreach (var pt in rootPatt.Nodes) { var node = GetParseNode(pt); root.Nodes.Add(node); } if (rootPatt.Releation == Releation.And) { if (rootPatt.OneOrMore) { root.Parse = ParseFuncFactory.MatchOneOrMoreWithAnd(root.Nodes); } else { root.Parse = ParseFuncFactory.MatchAnd(root.Nodes); } } else if (rootPatt.Releation == Releation.Or) { if (rootPatt.OneOrMore) { root.Parse = ParseFuncFactory.MatchOneOrMoreWithOr(root.Nodes); } else { root.Parse = ParseFuncFactory.MatchOr(root.Nodes); } } return root; }
private static void ProcessAndReleation(PatternNode pattNode) { int index = 0; string input = pattNode.Text; StringBuilder currentScanStr = new StringBuilder(); int leftParenthesisCount = 0; List<string> childNodeStr = new List<string>(); while (index < input.Length) { char currentChar = input[index]; switch (currentChar) { case '(': //abc(de(fg))取出abc if (leftParenthesisCount == 0 && currentScanStr.Length > 0) { childNodeStr.Add(currentScanStr.ToString()); currentScanStr.Remove(0, currentScanStr.Length); } leftParenthesisCount++; currentScanStr.Append(currentChar); break; case ')': leftParenthesisCount--; currentScanStr.Append(currentChar); if (index + 1 < input.Length && input[index + 1] == '*') { currentScanStr.Append('*'); index++; } //只有最顶层的括号闭合才取出来abc(de(fg))只取(de(fg)),不取(fg) if (leftParenthesisCount == 0 && currentScanStr.Length > 0) { childNodeStr.Add(currentScanStr.ToString()); currentScanStr.Remove(0, currentScanStr.Length); } break; default: currentScanStr.Append(currentChar); break; } index++; } if (leftParenthesisCount != 0) { throw new ApplicationException("括号不匹配"); } if (currentScanStr.Length > 0) { childNodeStr.Add(currentScanStr.ToString()); currentScanStr.Remove(0, currentScanStr.Length); } if (childNodeStr.Count > 1) {//本层有and关系,如"a(b|c)d"会分成a,(b|c),d pattNode.Releation = Releation.And; childNodeStr.ForEach((str) => pattNode.Nodes.Add(new PatternNode(str))); pattNode.Nodes.ForEach( (node) => { if (node.Text.Contains('(')) {//子节点下可能还有or或者and关系,如"(b|c)" var orNode = Parse(node.Text); node.Nodes = orNode.Nodes; node.Releation = orNode.Releation; node.OneOrMore = node.Text.EndsWith("*"); } else {//子节点是纯字符串,如"a" //结束递归 } }); } else { if (childNodeStr[0] == pattNode.Text) {//本层没有and关系,如"(b|c)","cd" if (pattNode.Text.IndexOf('(') == 0) {//(ab)或者(a|b),最外层有括号 int toRemove = pattNode.Text.Length - 2; if (pattNode.Text.EndsWith("*")) { toRemove--; } string qukuohao = pattNode.Text.Substring(1, toRemove); var qukuohaoNode = Parse(qukuohao); pattNode.Nodes = qukuohaoNode.Nodes ; pattNode.Releation = qukuohaoNode.Releation; pattNode.OneOrMore = pattNode.Text.EndsWith("*"); } else //如"abc" { //结束递归 } } else { //按理说不可能 System.Diagnostics.Debugger.Break(); } } }