コード例 #1
0
ファイル: PDFHocr.cs プロジェクト: jehan2898/root
        private string GetLineWithWords(HocrLineModel line)
        {
            string empty;

            if (line.Words.Count <= 0)
            {
                empty = string.Empty;
            }
            else
            {
                string str = string.Empty;
                if (this.getHOCRByWords)
                {
                    str = string.Format("\t\t\t<span class='ocrx_line' id='line_{0}' title=\"bbox {1} {2} {3} {4}\">\n", new object[] { line.LineID, line.XCord, line.YCord, line.XCord1, line.YCord1 });
                    foreach (WordData word in line.Words)
                    {
                        str = string.Concat(str, this.GetOCRWords(word), "\n");
                    }
                }
                else
                {
                    str = ((string.IsNullOrEmpty(line.FontNameFirstWord) ? true : line.FontSizeFirstWord <= 0) ? string.Format("\t\t\t<span class='ocrx_line' id='line_{0}' title=\"bbox {1} {2} {3} {4}\">\n", new object[] { line.LineID, line.XCord, line.YCord, line.XCord1, line.YCord1, line.Text }) : string.Format("\t\t\t<span class='ocrx_line' id='line_{0}' title=\"bbox {1} {2} {3} {4}; x_font {6}; x_fsize {7}\">\n", new object[] { line.LineID, line.XCord, line.YCord, line.XCord1, line.YCord1, line.Text, line.FontNameFirstWord, line.FontSizeFirstWord }));
                    str = string.Concat(str, "\t\t\t\t", string.Join(" ", (
                                                                         from w in line.Words
                                                                         select w.Word).ToArray <string>()), "\n");
                }
                str   = string.Concat(str, "\t\t\t</span>\n");
                empty = str;
            }
            return(empty);
        }
コード例 #2
0
ファイル: PDFHocr.cs プロジェクト: jehan2898/root
        private List <HocrLineModel> SortLineList(List <HocrLineModel> lines)
        {
            List <HocrLineModel> hocrLineModels = new List <HocrLineModel>();
            IEnumerable <IGrouping <double, HocrLineModel> > groupings =
                from l in lines
                group l by l.YCord;

            foreach (IGrouping <double, HocrLineModel> nums in groupings)
            {
                HocrLineModel hocrLineModel = new HocrLineModel()
                {
                    FontNameFirstWord = nums.ElementAt <HocrLineModel>(0).FontNameFirstWord,
                    FontSizeFirstWord = nums.ElementAt <HocrLineModel>(0).FontSizeFirstWord
                };
                hocrLineModel.Words.AddRange(nums.SelectMany <HocrLineModel, WordData>((HocrLineModel g) => g.Words).ToList <WordData>());
                hocrLineModel.XCord = (
                    from w in hocrLineModel.Words
                    select w.XCord).Min();
                hocrLineModel.XCord1 = (
                    from w in hocrLineModel.Words
                    select w.XCord1).Max();
                hocrLineModel.YCord = (
                    from w in hocrLineModel.Words
                    select w.YCord).Min();
                hocrLineModel.YCord1 = (
                    from w in hocrLineModel.Words
                    select w.YCord1).Max();
                hocrLineModels.Add(hocrLineModel);
            }
            hocrLineModels = (
                from l in hocrLineModels
                orderby l.YCord
                select l).ToList <HocrLineModel>();
            for (int i = 0; i < hocrLineModels.Count; i++)
            {
                HocrLineModel item = hocrLineModels[i];
                int           num  = this.lineID1;
                this.lineID1 = num + 1;
                item.LineID  = num;
            }
            return(hocrLineModels);
        }
コード例 #3
0
ファイル: PDFHocr.cs プロジェクト: jehan2898/root
        private void endLine()
        {
            HocrLineModel hocrLineModel = new HocrLineModel();

            if (this.wordList.Count > 0)
            {
                string str = string.Join(" ", (
                                             from w in this.wordList
                                             select w.Word).ToArray <string>());
                hocrLineModel.Text = (new Regex("\\s+(\\p{P})")).Replace(str, "$1");
                hocrLineModel.FontNameFirstWord = this.wordList[0].FontName;
                hocrLineModel.FontSizeFirstWord = this.wordList[0].FontSize;
                hocrLineModel.XCord             = this.wordList[0].XCord;
                hocrLineModel.XCord1            = this.wordList[this.wordList.Count - 1].XCord1;
                hocrLineModel.YCord             = this.wordList.Min <WordData>((WordData y) => y.YCord);
                hocrLineModel.YCord1            = this.wordList.Max <WordData>((WordData y) => y.YCord1);
                hocrLineModel.Words.AddRange(this.wordList);
                this.lineList.Add(hocrLineModel);
            }
            this.initialXcoord = 0;
            this.wordList.Clear();
        }