Example #1
0
        public bool Equals(WordMeta compare)
        {
            if (compare == null || compare.WordChars.Count == 0 || this.WordChars.Count == 0)
            {
                return(false);
            }
            var cur   = WordChars.First();
            var other = compare.WordChars.First();


            var curLast   = WordChars.Last();
            var otherLast = compare.WordChars.Last();

            if (curLast.YSize > cur.YSize && curLast.YSize / (double)cur.YSize < 5)
            {
                cur = curLast;
            }
            if (otherLast.YSize > other.YSize && otherLast.YSize / (double)other.YSize < 5)
            {
                other = otherLast;
            }

            if (cur.Word.Trim() == "" || cur.Word == "《" && WordChars.Count > 1)
            {
                cur = WordChars[1];
            }
            if (other.Word.Trim() == "" || other.Word == "《" && compare.WordChars.Count > 1)
            {
                other = compare.WordChars[1];
            }

            var aveHeight = (cur.Height + other.Height) / 2;

            var isSmallWord = IsUpOrDown(cur, other, compare, otherLast);

            if (isSmallWord)
            {
                this.IsSmallWord = isSmallWord;
                return(true);
            }

            var curAveHeight = WordChars.Average(w => w.Height);

            if (compare.IsSmallWord && compare.LastWord != null)
            {
                if (compare.LastWord.WordChars.Average(w => w.Height) - curAveHeight > 5)
                {
                    return(false);
                }
            }

            var first  = WordChars.Average(w => w.Y);
            var second = compare.WordChars.Average(w => w.Y);

            //破折号,副标题

            int subFlag = 0;

            for (int i = 0; i < WordChars.Count; i++)
            {
                var w = WordChars[i].Word.Trim();

                if (w == "")
                {
                    continue;
                }
                if (w == "—")
                {
                    subFlag++;
                }
                else
                {
                    break;
                }
            }

            if (subFlag > 0)
            {
                IsSubTitleStart = true;
                return(true);
            }

            if (otherLast.Word == ":" && (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) <= aveHeight)
            {
                IsSubTitleStart = true;
                return(true);
            }

            if (compare.SpaceHeight > 5 && (int)((Math.Abs(first - second) - curAveHeight) - (int)compare.SpaceHeight) >= 4)
            {
                IsEndBlock = true;
                return(false);
            }
            if (compare.SpaceHeight > 3 && compare.SpaceHeight < 5 && (int)((Math.Abs(cur.Y - other.Y) - cur.Height) - (int)compare.SpaceHeight) > 8)
            {
                IsEndBlock = true;
                return(false);
            }
            if (compare.SpaceHeight > 1 && compare.SpaceHeight < 1.2 && (int)((Math.Abs(cur.Y - other.Y) - cur.Height) - (int)compare.SpaceHeight) > 14)
            {
                IsEndBlock = true;
                return(false);
            }

            if (compare.WordChars.Count == 1 && other.Word == "—")
            {
                IsEndBlock = true;
                return(true);
            }

            if (compare.IsFirst && other.Y < cur.Y && other.X - cur.X > 100 && this.Width > 0 && compare.Width > 0 && Math.Round(compare.Width / this.Width, 2) < 0.4 && (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) > 0 && Math.Abs(cur.Height - other.Height) > 1 && Math.Abs(cur.Space - other.Space) > 1)
            {
                return(false);
            }
            if (compare.IsFirst && other.Y < cur.Y && cur.Y - other.Y > 15 && this.Width > 200 && Math.Round(compare.Width / this.Width, 2) < 0.4 && (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) > 3 && Math.Abs(cur.Height - other.Height) > 1 && Math.Abs(cur.Space - other.Space) > 1)
            {
                return(false);
            }

            var stricMode = IsPassStrictModeGroup(cur, other, compare, aveHeight);

            if (stricMode)
            {
                return(true);
            }

            var isCloseMode = IsCloseMode(cur, other, compare, aveHeight);

            if (isCloseMode)
            {
                return(true);
            }

            if (Math.Abs(cur.Height - other.Height) > 1.5 &&
                Math.Abs((int)cur.Space - (int)other.Space) >= 1 &&
                Math.Abs(cur.YSize - other.YSize) > 3 &&
                Math.Abs(cur.XSize - other.XSize) > 3)
            {
                return(false);
            }

            if (Math.Abs(cur.Height - other.Height) > 2 &&
                Math.Abs((int)cur.Space - (int)other.Space) >= 2 &&
                Math.Abs(cur.YSize - other.YSize) > 3 &&
                Math.Abs(cur.XSize - other.XSize) >= 3)
            {
                return(false);
            }

            if (Math.Abs(cur.Height - other.Height) > 1 &&
                Math.Abs(cur.FontSize - other.FontSize) > 1 &&
                Math.Abs((int)cur.Space - (int)other.Space) >= 1 &&
                (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) > 5 &&
                Math.Abs(cur.YSize - other.YSize) > 30 &&
                Math.Abs(cur.XSize - other.XSize) > 30)
            {
                return(false);
            }

            if (Math.Abs(cur.Space - other.Space) > 0.8 && Math.Abs(cur.YSize - other.YSize) > 3 && Math.Abs(cur.XSize - other.XSize) > 3 &&
                Math.Abs(cur.Y - other.Y) > 20 && (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) > aveHeight
                )
            {
                return(false);
            }

            if (IsSameFont(compare.WordChars, WordChars) &&
                cur.FontSize == other.FontSize &&
                cur.YSize == other.YSize &&
                Math.Abs(cur.XSize - other.XSize) < 5 &&
                (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) <= aveHeight + 2)
            {
                return(true);
            }

            if (IsSameFont(compare.WordChars, WordChars) &&
                cur.FontSize == other.FontSize &&
                cur.YSize == other.YSize &&
                cur.XSize == other.XSize &&
                cur.Height == other.Height &&
                cur.Space == other.Space &&
                cur.IsBold == other.IsBold &&
                cur.IsItalic == other.IsItalic &&
                (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) <= aveHeight + 12)
            {
                return(true);
            }

            if (IsSameFont(compare.WordChars, WordChars) &&
                Math.Abs(cur.FontSize - other.FontSize) < 2 &&
                Math.Abs(cur.YSize - other.YSize) <= (int)cur.YSize / 4 &&
                (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) < 2)
            {
                return(true);
            }

            return(IsSameFont(compare.WordChars, WordChars) &&
                   Math.Abs(cur.FontSize - other.FontSize) <= 3 &&
                   Math.Abs(cur.YSize - other.YSize) <= (int)cur.YSize / 4 + 3 &&
                   Math.Abs(cur.XSize - other.XSize) <= (int)cur.XSize / 4 + 3 &&
                   (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) <= aveHeight &&
                   Math.Abs(cur.Space - other.Space) < 5
                   );
        }
Example #2
0
        public bool IsValid(WordMeta compare)
        {
            var cur = WordChars.First();

            if (cur.Word.Trim() == "" && WordChars.Count > 1)
            {
                cur = WordChars[1];
            }

            var other = compare.WordChars.First();

            if (other.Word.Trim() == "" && compare.WordChars.Count > 1)
            {
                other = compare.WordChars[1];
            }

            if (cur.Height < other.Height / 4)
            {
                return(false);
            }

            if (cur.Height < other.Height / 3.5 && other.Y - cur.Y > 80 && other.Space - cur.Space > 18)
            {
                return(false);
            }

            if (Math.Abs(cur.Height - other.Height) < 1.5 && Math.Abs(cur.Space - other.Space) < 4 &&
                other.Y - cur.Y >= 14 &&
                WordChars.Count < 7 &&
                cur.X - compare.WordChars.Last().X > 8)
            {
                return(false);
            }


            var special = 0;
            var w       = WordChars.Where(r => r.Word == "!" || r.Word == "/" || r.Word == "$").ToList();

            if (w != null && w.Count > 0)
            {
                special += w.Count();
            }
            if (special > 3)
            {
                return(false);
            }

            w = WordChars.Where(r => r.Word == "," || r.Word == "*" || r.Word == "#" || r.Word == "." || r.Word == "+").ToList();

            if (w != null && w.Count > 0)
            {
                if (w.Count >= 3 && this.Text.ToString().Contains(" ") && this.Text.ToString().Length < 60)
                {
                    return(false);
                }
                if (w.Count >= 4)
                {
                    return(false);
                }
            }

            return(true);
        }