public static List <LocAndValue <String> > FindRegularExpressLoc(struRegularExpressFeature KeyWord, MyRootHtmlNode root) { var list = new List <LocAndValue <String> >(); foreach (var paragrah in root.Children) { //从各个段落的内容中取得:内容包含了内置列表,所以,这里不再重复 foreach (var contentNode in paragrah.Children) { list.AddRange(RegularExFinder(contentNode.PositionId, contentNode.Content, KeyWord)); } } return(list); }
/// <summary> /// 正则表达式检索方法(前置,正则,后置) /// </summary> /// <param name="loc"></param> /// <param name="OrgString"></param> /// <param name="regularfeature"></param> /// <param name="SplitChar"></param> /// <returns></returns> public static List <LocAndValue <String> > RegularExFinder(int loc, string OrgString, struRegularExpressFeature regularfeature, string SplitChar = "") { var list = new List <LocAndValue <String> >(); var reglist = RegularTool.GetRegular(OrgString, regularfeature.RegularExpress); foreach (var reg in reglist) { //根据前后词语进行过滤 bool IsBeforeOK = true; string BeforeString = ""; if (regularfeature.LeadingWordList != null) { IsBeforeOK = false; //前置词语 foreach (var leading in regularfeature.LeadingWordList) { if (reg.Index - leading.Length >= 0) { var word = OrgString.Substring(reg.Index - leading.Length, leading.Length); if (word.Equals(leading)) { BeforeString = leading; IsBeforeOK = true; break; } else { continue; } } } } if (!IsBeforeOK) { continue; } bool IsAfterOK = true; string AfterString = ""; if (regularfeature.TrailingWordList != null) { IsAfterOK = false; //后置词语 foreach (var trailing in regularfeature.TrailingWordList) { if (reg.Index + reg.Length + trailing.Length <= OrgString.Length) { var word = OrgString.Substring(reg.Index + reg.Length, trailing.Length); if (word.Equals(trailing)) { AfterString = trailing; IsAfterOK = true; break; } else { continue; } } } } if (IsBeforeOK && IsAfterOK) { var Loc = new LocAndValue <String>() { Value = BeforeString + SplitChar + reg.RawData + SplitChar + AfterString, StartIdx = reg.Index - BeforeString.Length, Loc = loc }; list.Add(Loc); } } return(list); }