예제 #1
0
        void ProcessText(int pageNum, Element element, List <TextAndAxisValue> values, double bottomAxisValue, double topAxisValue, bool isMatchNum)
        {
            Rect bbox = new Rect();

            element.GetBBox(bbox);

            string text = element.GetTextString();

            if (element.GetType() == Element.Type.e_text &&
                !string.IsNullOrEmpty(element.GetTextString().RemoveBlankSpace()))
            {
                Point  leftBottomPoint = GetCooridinateValueOfLeftBottomCorner(pageNum, bbox);
                double horAxisValue    = leftBottomPoint.y;
                double verAxisValue    = leftBottomPoint.x;
                if (
                    IsBelow(leftBottomPoint, topAxisValue) &&
                    !IsBelow(leftBottomPoint, bottomAxisValue) &&
                    (!isMatchNum || Regex.IsMatch(text, @"\d")))
                {
                    TextAndAxisValue textAndAxisValue = new TextAndAxisValue();
                    textAndAxisValue.text   = text;
                    textAndAxisValue.yValue = horAxisValue;
                    textAndAxisValue.xValue = verAxisValue;
                    values.Add(textAndAxisValue);
                }
            }
        }
예제 #2
0
 void Merge(List <TextAndAxisValue> TextAndAxisValues)
 {
     for (int i = 0; i < TextAndAxisValues.Count; i++)
     {
         TextAndAxisValue current = TextAndAxisValues[i];
         for (int j = i + 1; j < TextAndAxisValues.Count;)
         {
             TextAndAxisValue next = TextAndAxisValues[j];
             if (Math.Abs(current.yValue - next.yValue) < 3)
             {
                 if (current.xValue > next.xValue)
                 {
                     current.text = next.text + current.text;
                 }
                 else
                 {
                     current.text += next.text;
                 }
                 TextAndAxisValues.RemoveAt(j);
             }
             else
             {
                 j++;
             }
         }
     }
 }
예제 #3
0
        void Filtrate(List <TextAndAxisValue> TextAndAxisValues)
        {
            string regex = @"^(\d+|第\d+页(共\d+页)?|-\d+-)$";

            for (int i = 0; i < TextAndAxisValues.Count;)
            {
                TextAndAxisValue current = TextAndAxisValues[i];
                if (Regex.IsMatch(current.text.RemoveBlankSpace(), regex))
                {
                    TextAndAxisValues.RemoveAt(i);
                }
                else
                {
                    i++;
                }
            }
        }