public bool Equals(WordMeta compare) { if (compare == null || compare.WordChars.Count == 0 || this.WordChars.Count == 0) { return(false); } var cur = WordChars.First(); var other = compare.WordChars.First(); var curLast = WordChars.Last(); var otherLast = compare.WordChars.Last(); if (curLast.YSize > cur.YSize && curLast.YSize / (double)cur.YSize < 5) { cur = curLast; } if (otherLast.YSize > other.YSize && otherLast.YSize / (double)other.YSize < 5) { other = otherLast; } if (cur.Word.Trim() == "" || cur.Word == "《" && WordChars.Count > 1) { cur = WordChars[1]; } if (other.Word.Trim() == "" || other.Word == "《" && compare.WordChars.Count > 1) { other = compare.WordChars[1]; } var aveHeight = (cur.Height + other.Height) / 2; var isSmallWord = IsUpOrDown(cur, other, compare, otherLast); if (isSmallWord) { this.IsSmallWord = isSmallWord; return(true); } var curAveHeight = WordChars.Average(w => w.Height); if (compare.IsSmallWord && compare.LastWord != null) { if (compare.LastWord.WordChars.Average(w => w.Height) - curAveHeight > 5) { return(false); } } var first = WordChars.Average(w => w.Y); var second = compare.WordChars.Average(w => w.Y); //破折号,副标题 int subFlag = 0; for (int i = 0; i < WordChars.Count; i++) { var w = WordChars[i].Word.Trim(); if (w == "") { continue; } if (w == "—") { subFlag++; } else { break; } } if (subFlag > 0) { IsSubTitleStart = true; return(true); } if (otherLast.Word == ":" && (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) <= aveHeight) { IsSubTitleStart = true; return(true); } if (compare.SpaceHeight > 5 && (int)((Math.Abs(first - second) - curAveHeight) - (int)compare.SpaceHeight) >= 4) { IsEndBlock = true; return(false); } if (compare.SpaceHeight > 3 && compare.SpaceHeight < 5 && (int)((Math.Abs(cur.Y - other.Y) - cur.Height) - (int)compare.SpaceHeight) > 8) { IsEndBlock = true; return(false); } if (compare.SpaceHeight > 1 && compare.SpaceHeight < 1.2 && (int)((Math.Abs(cur.Y - other.Y) - cur.Height) - (int)compare.SpaceHeight) > 14) { IsEndBlock = true; return(false); } if (compare.WordChars.Count == 1 && other.Word == "—") { IsEndBlock = true; return(true); } if (compare.IsFirst && other.Y < cur.Y && other.X - cur.X > 100 && this.Width > 0 && compare.Width > 0 && Math.Round(compare.Width / this.Width, 2) < 0.4 && (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) > 0 && Math.Abs(cur.Height - other.Height) > 1 && Math.Abs(cur.Space - other.Space) > 1) { return(false); } if (compare.IsFirst && other.Y < cur.Y && cur.Y - other.Y > 15 && this.Width > 200 && Math.Round(compare.Width / this.Width, 2) < 0.4 && (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) > 3 && Math.Abs(cur.Height - other.Height) > 1 && Math.Abs(cur.Space - other.Space) > 1) { return(false); } var stricMode = IsPassStrictModeGroup(cur, other, compare, aveHeight); if (stricMode) { return(true); } var isCloseMode = IsCloseMode(cur, other, compare, aveHeight); if (isCloseMode) { return(true); } if (Math.Abs(cur.Height - other.Height) > 1.5 && Math.Abs((int)cur.Space - (int)other.Space) >= 1 && Math.Abs(cur.YSize - other.YSize) > 3 && Math.Abs(cur.XSize - other.XSize) > 3) { return(false); } if (Math.Abs(cur.Height - other.Height) > 2 && Math.Abs((int)cur.Space - (int)other.Space) >= 2 && Math.Abs(cur.YSize - other.YSize) > 3 && Math.Abs(cur.XSize - other.XSize) >= 3) { return(false); } if (Math.Abs(cur.Height - other.Height) > 1 && Math.Abs(cur.FontSize - other.FontSize) > 1 && Math.Abs((int)cur.Space - (int)other.Space) >= 1 && (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) > 5 && Math.Abs(cur.YSize - other.YSize) > 30 && Math.Abs(cur.XSize - other.XSize) > 30) { return(false); } if (Math.Abs(cur.Space - other.Space) > 0.8 && Math.Abs(cur.YSize - other.YSize) > 3 && Math.Abs(cur.XSize - other.XSize) > 3 && Math.Abs(cur.Y - other.Y) > 20 && (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) > aveHeight ) { return(false); } if (IsSameFont(compare.WordChars, WordChars) && cur.FontSize == other.FontSize && cur.YSize == other.YSize && Math.Abs(cur.XSize - other.XSize) < 5 && (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) <= aveHeight + 2) { return(true); } if (IsSameFont(compare.WordChars, WordChars) && cur.FontSize == other.FontSize && cur.YSize == other.YSize && cur.XSize == other.XSize && cur.Height == other.Height && cur.Space == other.Space && cur.IsBold == other.IsBold && cur.IsItalic == other.IsItalic && (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) <= aveHeight + 12) { return(true); } if (IsSameFont(compare.WordChars, WordChars) && Math.Abs(cur.FontSize - other.FontSize) < 2 && Math.Abs(cur.YSize - other.YSize) <= (int)cur.YSize / 4 && (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) < 2) { return(true); } return(IsSameFont(compare.WordChars, WordChars) && Math.Abs(cur.FontSize - other.FontSize) <= 3 && Math.Abs(cur.YSize - other.YSize) <= (int)cur.YSize / 4 + 3 && Math.Abs(cur.XSize - other.XSize) <= (int)cur.XSize / 4 + 3 && (int)(Math.Abs(cur.Y - other.Y) - cur.Height - aveHeight) <= aveHeight && Math.Abs(cur.Space - other.Space) < 5 ); }
public bool IsValid(WordMeta compare) { var cur = WordChars.First(); if (cur.Word.Trim() == "" && WordChars.Count > 1) { cur = WordChars[1]; } var other = compare.WordChars.First(); if (other.Word.Trim() == "" && compare.WordChars.Count > 1) { other = compare.WordChars[1]; } if (cur.Height < other.Height / 4) { return(false); } if (cur.Height < other.Height / 3.5 && other.Y - cur.Y > 80 && other.Space - cur.Space > 18) { return(false); } if (Math.Abs(cur.Height - other.Height) < 1.5 && Math.Abs(cur.Space - other.Space) < 4 && other.Y - cur.Y >= 14 && WordChars.Count < 7 && cur.X - compare.WordChars.Last().X > 8) { return(false); } var special = 0; var w = WordChars.Where(r => r.Word == "!" || r.Word == "/" || r.Word == "$").ToList(); if (w != null && w.Count > 0) { special += w.Count(); } if (special > 3) { return(false); } w = WordChars.Where(r => r.Word == "," || r.Word == "*" || r.Word == "#" || r.Word == "." || r.Word == "+").ToList(); if (w != null && w.Count > 0) { if (w.Count >= 3 && this.Text.ToString().Contains(" ") && this.Text.ToString().Length < 60) { return(false); } if (w.Count >= 4) { return(false); } } return(true); }