Пример #1
0
        public void MakeWord(List <WordInfo> wordInfos)
        {
            int i = 1;
            //上一个字符块
            WordInfo lastChunk = null;

            //上一个单词
            WordMeta lastTextInfo = new WordMeta()
            {
                Index     = i,
                Text      = new StringBuilder(),
                WordChars = new List <WordInfo>()
            };

            foreach (var chunk in wordInfos)
            {
                if (lastChunk == null)
                {
                    lastTextInfo.Text.Append(chunk.Word);
                    lastTextInfo.WordChars.Add(chunk);
                    i++;
                    Words.Add(lastTextInfo);
                }
                else
                {
                    //属于同一行
                    if (IsSameLine(chunk, lastChunk))
                    {
                        float dist = chunk.DistanceFromEndOf(lastChunk);

                        if (dist < -chunk.Space)
                        {
                            lastTextInfo.Text.Append(" ");
                        }
                        //append a space if the trailing char of the prev string wasn't a space && the 1st char of the current string isn't a space
                        else if (dist > chunk.Space / 2.0f && chunk.Word.Length > 0 && chunk.Word[0] != ' ' && lastChunk.Word.Length > 0 && lastChunk.Word[lastChunk.Word.Length - 1] != ' ')
                        {
                            lastTextInfo.Text.Append(" ");
                        }

                        lastTextInfo.Text.Append(chunk.Word);
                        lastTextInfo.WordChars.Add(chunk);
                    }
                    else
                    {
                        lastTextInfo = new WordMeta()
                        {
                            Index     = i,
                            Text      = new StringBuilder(),
                            WordChars = new List <WordInfo>()
                        };
                        lastTextInfo.Text.Append(chunk.Word);
                        lastTextInfo.WordChars.Add(chunk);
                        i++;
                        Words.Add(lastTextInfo);
                    }
                }
                lastChunk = chunk;
            }
        }
Пример #2
0
        public void MakeBlock(Line line)
        {
            var      dic  = line.LineDic;
            WordMeta last = null;

            int i = 1;

            //过滤掉空行
            var tempDic = new Dictionary <int, List <WordMeta> >();

            var filter = dic.Where(d => d.Value.Count == 1 && d.Value[0].Text.ToString().Trim() != "" || d.Value.Count > 1).OrderBy(d => d.Key);

            int j = 0;

            if (filter != null && filter.Count() > 0)
            {
                foreach (var item in filter)
                {
                    tempDic.Add(j, item.Value);

                    j++;
                }
            }

            foreach (var item in tempDic)
            {
                bool isNextEqual    = false;
                bool isLastPreMatch = false;

                if (item.Value.Count > 0)
                {
                    var current = item.Value[0];

                    if (last != null)
                    {
                        if (!current.IsValid(last))
                        {
                            continue;
                        }

                        var gindex = BlockDic.Last().Value.GroupBy(v => v.Index).OrderBy(v => v.Key).Take(2);

                        float spaceHeight = 0;

                        if (gindex.Count() > 1)
                        {
                            var first = gindex.First().First().WordChars.Average(w => w.Y);

                            var second = gindex.Last().First().WordChars.Average(w => w.Y);

                            var secondAveHeight = gindex.Last().First().WordChars.Average(w => w.Height);

                            spaceHeight = Math.Abs(second - first) - secondAveHeight;

                            last.SpaceHeight = spaceHeight;
                        }

                        if (BlockDic.Last().Value.Count == 1 && BlockDic.Last().Value.First() == last)
                        {
                            last.IsFirst = true;
                        }

                        if (BlockDic.Last().Value.Count > 1 && BlockDic.Last().Value.Last() == last)
                        {
                            last.OffSetHeight = Math.Abs(BlockDic.Last().Value.Last().WordChars.First().Y - BlockDic.Last().Value.First().WordChars.First().Y);
                            last.OffSetWidth  = Math.Abs(BlockDic.Last().Value.Last().WordChars.Max(v => v.X) - BlockDic.Last().Value.First().WordChars.Min(v => v.X));
                        }


                        //计算宽度
                        var minX = last.WordChars.Min(v => v.X);
                        var maxX = last.WordChars.Max(v => v.X);

                        last.Width    = maxX - minX;
                        current.Width = current.WordChars.Max(v => v.X) - current.WordChars.Min(v => v.X);


                        last.MaxY    = last.WordChars.Max(v => v.Y);
                        last.MaxX    = last.WordChars.Max(v => v.X);
                        current.MaxX = current.WordChars.Max(v => v.X);


                        if (current.Equals(last))
                        {
                            if (BlockDic.Keys.Contains(i))
                            {
                                BlockDic[i].AddRange(item.Value);
                            }
                        }

                        else
                        {
                            //如果last是一个上缀,那么取last的上一个与当前的比较

                            if (last.WordChars.Count == 1 && BlockDic.Last().Value.Count > 1 && BlockDic.Last().Value.Last() == last)
                            {
                                isLastPreMatch = false;

                                var lastPre = BlockDic.Last().Value[BlockDic.Last().Value.Count - 2];

                                if (current.Equals(lastPre))
                                {
                                    if (BlockDic.Keys.Contains(i))
                                    {
                                        BlockDic[i].AddRange(item.Value);
                                    }

                                    isLastPreMatch = true;
                                }
                                else
                                {
                                    isLastPreMatch = false;
                                }
                            }
                            else
                            {
                                int count = 0;

                                isNextEqual = false;

                                while (count < 3 && !current.IsEndBlock)
                                {
                                    count++;

                                    if (tempDic.Keys.Contains(item.Key + count))
                                    {
                                        var next = tempDic[item.Key + count];

                                        var nextFirstChar = next[0].WordChars.First();

                                        var nextLastChar = next[0].WordChars.Last();

                                        if (Math.Abs(nextFirstChar.Y - last.WordChars.First().Y) < 30 &&
                                            Math.Abs(current.WordChars.First().Y - nextFirstChar.Y) < 30 &&
                                            last.WordChars.First().Word.Trim() != "" &&
                                            next[0].WordChars.Count > 1 &&
                                            (Math.Abs(nextFirstChar.X - last.WordChars.First().X) < 60 ||
                                             Math.Abs(nextFirstChar.X - last.WordChars.Last().X) < 60))
                                        {
                                            if (next[0].Equals(last))
                                            {
                                                isNextEqual = true;
                                                break;
                                            }
                                        }
                                    }
                                }
                            }
                            if (isNextEqual)
                            {
                                if (current.WordChars.First().X - last.WordChars.First().X < 30 || current.WordChars.First().X - last.WordChars.Last().X < 30)
                                {
                                    if (BlockDic.Keys.Contains(i))
                                    {
                                        BlockDic[i].AddRange(item.Value);
                                    }
                                }
                                else
                                {
                                    continue;
                                }
                            }
                            else if (isLastPreMatch == false)
                            {
                                i++;
                                BlockDic.Add(i, item.Value);
                            }
                        }
                    }
                    else
                    {
                        BlockDic.Add(i, item.Value);
                    }
                    if (isNextEqual == false)
                    {
                        current.LastWord = last;
                        last             = current;
                    }
                }
            }
        }