private static void ProcessPotentialMergeBlock(ref int i) { if (TryClassifyAsSymbol(blocks[i])) { return; } TextBlock mergePreviousBlock = null; bool isPreviousMergeValid = TryMergeToPreviousBlock(i, ref mergePreviousBlock); TextBlock mergeNextBlock = null; bool isNextMergeValid = TryMergeToNextBlock(i, ref mergeNextBlock, ref isPreviousMergeValid); if (isPreviousMergeValid && isNextMergeValid) { DetermineMergeDirection(i, mergePreviousBlock, ref isPreviousMergeValid, mergeNextBlock, ref isNextMergeValid); } if (isPreviousMergeValid) { JoinPreviousMergeBlockToListBlock(ref i, mergePreviousBlock); return; } if (isNextMergeValid) { JoinNextMergeBlockToListBlock(i, mergeNextBlock); return; } if (!isHorizontalText) { if (VerticalTextProjection.GetMaxWidthPostionPercentToNeightbor(blocks, i) > VerticalTextProjection.MARK_WIDTH_POSITION_PERCENT) { var lineIndex = blocks[i].LineIndex; var avgGapIntra = VerticalTextProjection.AvgGapIntraChars[lineIndex]; if (lineIndex == blocks[i - 1].LineIndex && blocks[i].Top - blocks[i - 1].Bottom <= avgGapIntra && mergePreviousBlock != null) { JoinPreviousMergeBlockToListBlock(ref i, mergePreviousBlock); return; } else if (lineIndex == blocks[i + 1].LineIndex && blocks[i + 1].Top - blocks[i].Bottom <= avgGapIntra && mergeNextBlock != null) { JoinNextMergeBlockToListBlock(i, mergeNextBlock); return; } } } isPreviousMerge = false; }
private static List <TextBlock> ProcessVerticalLines(GrayImage image, uint[] horizontal) { List <TextBlock> textBlocks = new List <TextBlock>(); List <SplitMultiLines.Line> textLines = SplitMultiLines.SplitProjectDensity(horizontal); textLines.Reverse(); //Japanese column is going from right to left for (int i = 0; i < textLines.Count; i++) { var density = Projection.ProjectSubImageOnVerticalLine(image, textLines[i]); textBlocks.AddRange(VerticalTextProjection.FindAllTextBlock(image, density, textLines[i], i)); } return(textBlocks); }
private static float CaluculateValidGap(int blockIndex, char text) { if (isHorizontalText) { float avgGap = HorizontalTextProjection.AvgGapBlocks[blocks[blockIndex].LineIndex]; float percent = HorizontalTextProjection.GetMaxHeightPostionPercentToNeightbor(blocks, blockIndex); if (text == '、' || text == 'l' || text == 'ノ') { if (percent > HorizontalTextProjection.MARK_HEIGH_POSITION_PERCENT) { return(2f * avgGap); } } else if (text == 'し') { return((float)Math.Ceiling(avgGap * 2f)); } else if (percent > HorizontalTextProjection.MARK_HEIGH_POSITION_PERCENT) { return(1.5f * avgGap); } return(avgGap); } else { float percent = VerticalTextProjection.GetMaxWidthPostionPercentToNeightbor(blocks, blockIndex); if (percent > VerticalTextProjection.MARK_WIDTH_POSITION_PERCENT) { return(VerticalTextProjection.AvgGapBlocks[blocks[blockIndex].LineIndex] * 1.5f); } else { return(VerticalTextProjection.AvgGapBlocks[blocks[blockIndex].LineIndex]); } } }
private static bool TryMergeToNextBlock(int i, ref TextBlock mergeNextBlock, ref bool isPreviousMergeValid) { if (blocks[i + 1].LineIndex != blocks[i].LineIndex) { return(false); } mergeNextBlock = null; bool isNextMergeValid = false; bool isCanMerge = IsCanMergeBlock(i + 1, blocks[i].Text[0]); if (isCanMerge) { mergeNextBlock = ClassifyMergeBlock(i); if (isHorizontalText) { ClassifyTextBlock(blocks[i + 1]); if (blocks[i].Text[0] == 'し') { if (mergeNextBlock.Text[0] == 'い' && (blocks[i + 1].Text[0] == '、' || IsVerticalLineChar(blocks[i + 1].Text[0]))) { isPreviousMergeValid = false; isNextMergeValid = true; } } else if (IsVerticalLineChar(blocks[i].Text[0]) && IsVerticalLineChar(blocks[i + 1].Text[0]) && blocks[i + 1].IsNeedOCRProcess && i + 2 < blocks.Count) { var temp = mergeNextBlock; mergeNextBlock = TextBlock.CreateMergeHorizontalTextBlock(mergeNextBlock, blocks[i + 2]); ClassifyTextBlock(mergeNextBlock); isNextMergeValid = Array.BinarySearch <char>(HorizontalTextProjection.LeftMistakenLetters, mergeNextBlock.Text[0]) > -1; if (isNextMergeValid) { blocks.RemoveAt(i); blocks[i] = temp; } else { mergeNextBlock = temp; } } else { isNextMergeValid = IsCanCombineToNext(mergeNextBlock, blocks[i].Text[0]); } } else { isNextMergeValid = IsCanCombineToNext(mergeNextBlock, blocks[i].Text[0]); if (isNextMergeValid && (mergeNextBlock.Text[0] == 'こ' || mergeNextBlock.Text[0] == 'ご' || mergeNextBlock.Text[0] == 'ニ' || mergeNextBlock.Text[0] == '二')) { if (blocks[i].Width < 0.5f * blocks[i + 1].Width) { return(false); } VerticalTextProjection.FindBlockType(mergeNextBlock); var index = i + 2; if (mergeNextBlock.Type < TextBlockType.Single && index < blocks.Count && blocks[index].Type == TextBlockType.Mark && blocks[index].Width > 0.8f * mergeNextBlock.Width) { var temp = mergeNextBlock; mergeNextBlock = TextBlock.CreateMergeVerticalTextBlock(mergeNextBlock, blocks[i + 2]); ClassifyTextBlock(mergeNextBlock); if (mergeNextBlock.Text[0] == '三' || mergeNextBlock.Text[0] == 'ミ') { blocks.RemoveAt(i); blocks[i] = temp; } else { mergeNextBlock = temp; } } } } } else if (!isHorizontalText) { ProcessSpecialCasesInVerticalLayout(i, ref mergeNextBlock, ref isNextMergeValid); } return(isNextMergeValid); }