void ProcessText(int pageNum, Element element, List <TextAndAxisValue> values, double bottomAxisValue, double topAxisValue, bool isMatchNum) { Rect bbox = new Rect(); element.GetBBox(bbox); string text = element.GetTextString(); if (element.GetType() == Element.Type.e_text && !string.IsNullOrEmpty(element.GetTextString().RemoveBlankSpace())) { Point leftBottomPoint = GetCooridinateValueOfLeftBottomCorner(pageNum, bbox); double horAxisValue = leftBottomPoint.y; double verAxisValue = leftBottomPoint.x; if ( IsBelow(leftBottomPoint, topAxisValue) && !IsBelow(leftBottomPoint, bottomAxisValue) && (!isMatchNum || Regex.IsMatch(text, @"\d"))) { TextAndAxisValue textAndAxisValue = new TextAndAxisValue(); textAndAxisValue.text = text; textAndAxisValue.yValue = horAxisValue; textAndAxisValue.xValue = verAxisValue; values.Add(textAndAxisValue); } } }
void Merge(List <TextAndAxisValue> TextAndAxisValues) { for (int i = 0; i < TextAndAxisValues.Count; i++) { TextAndAxisValue current = TextAndAxisValues[i]; for (int j = i + 1; j < TextAndAxisValues.Count;) { TextAndAxisValue next = TextAndAxisValues[j]; if (Math.Abs(current.yValue - next.yValue) < 3) { if (current.xValue > next.xValue) { current.text = next.text + current.text; } else { current.text += next.text; } TextAndAxisValues.RemoveAt(j); } else { j++; } } } }
void Filtrate(List <TextAndAxisValue> TextAndAxisValues) { string regex = @"^(\d+|第\d+页(共\d+页)?|-\d+-)$"; for (int i = 0; i < TextAndAxisValues.Count;) { TextAndAxisValue current = TextAndAxisValues[i]; if (Regex.IsMatch(current.text.RemoveBlankSpace(), regex)) { TextAndAxisValues.RemoveAt(i); } else { i++; } } }