/// <summary> /// 寻找字符的位置信息 /// </summary> /// <param name="KeyWord"></param> /// <param name="root"></param> /// <returns></returns> public static List <LocAndValue <String> > FindWordLoc(string KeyWord, MyRootHtmlNode root) { var paragrahIdList = new List <LocAndValue <String> >(); foreach (var paragrah in root.Children) { //从各个段落的内容中取得:内容包含了内置列表,所以,这里不再重复 foreach (var contentNode in paragrah.Children) { if (contentNode.TableId == -1) { var Idx = contentNode.Content.IndexOf(KeyWord); if (Idx != -1) { var Loc = new LocAndValue <String>() { Value = KeyWord, Loc = contentNode.PositionId, StartIdx = Idx, }; paragrahIdList.Add(Loc); } } } } return(paragrahIdList); }
bool JiaFangValidator(LocAndValue <string> x) { if (x.Value.Contains("招标")) { return(false); } return(true); }
static bool IsMatch <T>(WordRule rule, LocAndValue <T> evaluate) { if (rule.Description != null && rule.Description.Count != 0) { if (!rule.Description.Contains(evaluate.Description)) { return(false); } } if (rule.Word != null && rule.Word.Count != 0) { if (!rule.Word.Contains(evaluate.Value.ToString())) { return(false); } } return(true); }
/// <summary> /// 距离(别的词语在后面,则为正数) /// </summary> /// <param name="other"></param> /// <returns></returns> public int Distance(LocAndValue <T> other) { int mypos = Loc * 1000 + StartIdx; int otherpos = other.Loc * 1000 + other.StartIdx; if (Value is string) { //别的词语在后面,则为正数 if (other.StartIdx > this.StartIdx) { //其他 return(otherpos - mypos - Value.ToString().Length); } else { return(otherpos + other.Value.ToString().Length - mypos); } } else { //别的词语在后面,则为正数 return(otherpos - mypos); } }
/// <summary> /// 正则表达式检索方法(前置,正则,后置) /// </summary> /// <param name="loc"></param> /// <param name="OrgString"></param> /// <param name="regularfeature"></param> /// <param name="SplitChar"></param> /// <returns></returns> public static List <LocAndValue <String> > RegularExFinder(int loc, string OrgString, struRegularExpressFeature regularfeature, string SplitChar = "") { var list = new List <LocAndValue <String> >(); var reglist = RegularTool.GetRegular(OrgString, regularfeature.RegularExpress); foreach (var reg in reglist) { //根据前后词语进行过滤 bool IsBeforeOK = true; string BeforeString = ""; if (regularfeature.LeadingWordList != null) { IsBeforeOK = false; //前置词语 foreach (var leading in regularfeature.LeadingWordList) { if (reg.Index - leading.Length >= 0) { var word = OrgString.Substring(reg.Index - leading.Length, leading.Length); if (word.Equals(leading)) { BeforeString = leading; IsBeforeOK = true; break; } else { continue; } } } } if (!IsBeforeOK) { continue; } bool IsAfterOK = true; string AfterString = ""; if (regularfeature.TrailingWordList != null) { IsAfterOK = false; //后置词语 foreach (var trailing in regularfeature.TrailingWordList) { if (reg.Index + reg.Length + trailing.Length <= OrgString.Length) { var word = OrgString.Substring(reg.Index + reg.Length, trailing.Length); if (word.Equals(trailing)) { AfterString = trailing; IsAfterOK = true; break; } else { continue; } } } } if (IsBeforeOK && IsAfterOK) { var Loc = new LocAndValue <String>() { Value = BeforeString + SplitChar + reg.RawData + SplitChar + AfterString, StartIdx = reg.Index - BeforeString.Length, Loc = loc }; list.Add(Loc); } } return(list); }