public RelabelNode(TsurgeonPattern child, string newLabel) : base("relabel", new TsurgeonPattern[] { child }) { // Overly complicated pattern to identify regexes surrounded by /, // possibly with / escaped inside the regex. // The purpose of the [^/]*[^/\\\\] is to match characters that // aren't / and to allow escaping of other characters. // The purpose of the \\\\/ is to allow escaped / inside the pattern. // The purpose of the \\\\\\\\ is to allow escaped \ at the end of // the pattern, so you can match, for example, /\\/. There need to // be 8x\ because both java and regexes need escaping, resulting in 4x. Java.Util.Regex.Matcher m1 = substPattern.Matcher(newLabel); if (m1.Matches()) { mode = RelabelNode.RelabelMode.Regex; this.labelRegex = Pattern.Compile(m1.Group(1)); this.replacementString = m1.Group(2); replacementPieces = new List <string>(); Java.Util.Regex.Matcher generalMatcher = oneGeneralReplacementPattern.Matcher(m1.Group(2)); int lastPosition = 0; while (generalMatcher.Find()) { if (generalMatcher.Start() > lastPosition) { replacementPieces.Add(Sharpen.Runtime.Substring(replacementString, lastPosition, generalMatcher.Start())); } lastPosition = generalMatcher.End(); string piece = generalMatcher.Group(); if (piece.Equals(string.Empty)) { continue; } replacementPieces.Add(generalMatcher.Group()); } if (lastPosition < replacementString.Length) { replacementPieces.Add(Sharpen.Runtime.Substring(replacementString, lastPosition)); } this.newLabel = null; } else { mode = RelabelNode.RelabelMode.Fixed; Java.Util.Regex.Matcher m2 = regexPattern.Matcher(newLabel); if (m2.Matches()) { // fixed relabel but surrounded by regex slashes string unescapedLabel = m2.Group(1); this.newLabel = RemoveEscapeSlashes(unescapedLabel); } else { // just a node name to relabel to this.newLabel = newLabel; } this.replacementString = null; this.replacementPieces = null; this.labelRegex = null; } }
public DescriptionPattern(Relation rel, bool negDesc, string desc, string name, bool useBasicCat, IFunction <string, string> basicCatFunction, IList <Pair <int, string> > variableGroups, bool isLink, string linkedName) { // what size string matchers to use before switching to regex for // disjunction matches // todo: conceptually final, but we'd need to rewrite TregexParser // to make it so. // also conceptually final, but it depends on the child // specifies the groups in a regex that are captured as matcher-global string variables // for example, /^:$/ // for example, /^[$]$/ // for example, /^-NONE-$/ // for example, /^JJ/ this.rel = rel; this.negDesc = negDesc; this.isLink = isLink; this.linkedName = linkedName; if (desc != null) { stringDesc = desc; // TODO: factor out some of these blocks of code if (desc.Equals("__") || desc.Equals("/.*/") || desc.Equals("/^.*$/")) { descriptionMode = DescriptionPattern.DescriptionMode.Anything; descPattern = null; exactMatch = null; stringFilter = null; } else { if (SingleWordPattern.Matcher(desc).Matches()) { // Expressions are written like this to put special characters // in the tregex matcher, but a regular expression is less // efficient than a simple string match descriptionMode = DescriptionPattern.DescriptionMode.Exact; descPattern = null; Java.Util.Regex.Matcher matcher = SingleWordPattern.Matcher(desc); matcher.Matches(); string matchedGroup = null; for (int i = 1; i <= matcher.GroupCount(); ++i) { if (matcher.Group(i) != null) { matchedGroup = matcher.Group(i); break; } } exactMatch = matchedGroup; stringFilter = null; } else { //log.info("DescriptionPattern: converting " + desc + " to " + exactMatch); if (MultiWordPattern.Matcher(desc).Matches()) { Java.Util.Regex.Matcher matcher = MultiWordPattern.Matcher(desc); matcher.Matches(); string matchedGroup = null; for (int i = 1; i <= matcher.GroupCount(); ++i) { if (matcher.Group(i) != null) { matchedGroup = matcher.Group(i); break; } } matchedGroup = matchedGroup.ReplaceAll("\\\\", string.Empty); if (matchedGroup.Split("[|]").Length > MaxStringMatcherSize) { descriptionMode = DescriptionPattern.DescriptionMode.Pattern; descPattern = Pattern.Compile(Sharpen.Runtime.Substring(desc, 1, desc.Length - 1)); exactMatch = null; stringFilter = null; } else { //log.info("DescriptionPattern: not converting " + desc); descriptionMode = DescriptionPattern.DescriptionMode.Strings; descPattern = null; exactMatch = null; stringFilter = new ArrayStringFilter(ArrayStringFilter.Mode.Exact, matchedGroup.Split("[|]")); } } else { //log.info("DescriptionPattern: converting " + desc + " to " + stringFilter); if (CaseInsensitivePattern.Matcher(desc).Matches()) { Java.Util.Regex.Matcher matcher = CaseInsensitivePattern.Matcher(desc); matcher.Matches(); string matchedGroup = null; for (int i = 1; i <= matcher.GroupCount(); ++i) { if (matcher.Group(i) != null) { matchedGroup = matcher.Group(i); break; } } matchedGroup = matchedGroup.ReplaceAll("\\\\", string.Empty); if (matchedGroup.Split("[|]").Length > MaxStringMatcherSize) { descriptionMode = DescriptionPattern.DescriptionMode.Pattern; descPattern = Pattern.Compile(Sharpen.Runtime.Substring(desc, 1, desc.Length - 1)); exactMatch = null; stringFilter = null; } else { //log.info("DescriptionPattern: not converting " + desc); descriptionMode = DescriptionPattern.DescriptionMode.Strings; descPattern = null; exactMatch = null; stringFilter = new ArrayStringFilter(ArrayStringFilter.Mode.CaseInsensitive, matchedGroup.Split("[|]")); } } else { //log.info("DescriptionPattern: converting " + desc + " to " + stringFilter); if (PrefixPattern.Matcher(desc).Matches()) { Java.Util.Regex.Matcher matcher = PrefixPattern.Matcher(desc); matcher.Matches(); string matchedGroup = null; for (int i = 1; i <= matcher.GroupCount(); ++i) { if (matcher.Group(i) != null) { matchedGroup = matcher.Group(i); break; } } if (matchedGroup.Split("\\|").Length > MaxStringMatcherSize) { descriptionMode = DescriptionPattern.DescriptionMode.Pattern; descPattern = Pattern.Compile(Sharpen.Runtime.Substring(desc, 1, desc.Length - 1)); exactMatch = null; stringFilter = null; } else { //log.info("DescriptionPattern: not converting " + desc); descriptionMode = DescriptionPattern.DescriptionMode.Strings; descPattern = null; exactMatch = null; stringFilter = new ArrayStringFilter(ArrayStringFilter.Mode.Prefix, matchedGroup.Split("[|]")); } } else { //log.info("DescriptionPattern: converting " + desc + " to " + stringFilter); if (desc.Matches("/.*/")) { descriptionMode = DescriptionPattern.DescriptionMode.Pattern; descPattern = Pattern.Compile(Sharpen.Runtime.Substring(desc, 1, desc.Length - 1)); exactMatch = null; stringFilter = null; } else { if (desc.IndexOf('|') >= 0) { // patterns which contain ORs are a special case; we either // promote those to regex match or make a string matcher out // of them. for short enough disjunctions, a simple string // matcher can be more efficient than a regex. string[] words = desc.Split("[|]"); if (words.Length <= MaxStringMatcherSize) { descriptionMode = DescriptionPattern.DescriptionMode.Strings; descPattern = null; exactMatch = null; stringFilter = new ArrayStringFilter(ArrayStringFilter.Mode.Exact, words); } else { descriptionMode = DescriptionPattern.DescriptionMode.Pattern; descPattern = Pattern.Compile("^(?:" + desc + ")$"); exactMatch = null; stringFilter = null; } } else { // raw description descriptionMode = DescriptionPattern.DescriptionMode.Exact; descPattern = null; exactMatch = desc; stringFilter = null; } } } } } } } } else { if (name == null && linkedName == null) { throw new AssertionError("Illegal description pattern. Does not describe a node or link/name a variable"); } stringDesc = " "; descriptionMode = null; descPattern = null; exactMatch = null; stringFilter = null; } this.name = name; SetChild(null); this.basicCatFunction = (useBasicCat ? basicCatFunction : null); // System.out.println("Made " + (negDesc ? "negated " : "") + "DescNode with " + desc); this.variableGroups = variableGroups; }