private string GetLineWithWords(HocrLineModel line) { string empty; if (line.Words.Count <= 0) { empty = string.Empty; } else { string str = string.Empty; if (this.getHOCRByWords) { str = string.Format("\t\t\t<span class='ocrx_line' id='line_{0}' title=\"bbox {1} {2} {3} {4}\">\n", new object[] { line.LineID, line.XCord, line.YCord, line.XCord1, line.YCord1 }); foreach (WordData word in line.Words) { str = string.Concat(str, this.GetOCRWords(word), "\n"); } } else { str = ((string.IsNullOrEmpty(line.FontNameFirstWord) ? true : line.FontSizeFirstWord <= 0) ? string.Format("\t\t\t<span class='ocrx_line' id='line_{0}' title=\"bbox {1} {2} {3} {4}\">\n", new object[] { line.LineID, line.XCord, line.YCord, line.XCord1, line.YCord1, line.Text }) : string.Format("\t\t\t<span class='ocrx_line' id='line_{0}' title=\"bbox {1} {2} {3} {4}; x_font {6}; x_fsize {7}\">\n", new object[] { line.LineID, line.XCord, line.YCord, line.XCord1, line.YCord1, line.Text, line.FontNameFirstWord, line.FontSizeFirstWord })); str = string.Concat(str, "\t\t\t\t", string.Join(" ", ( from w in line.Words select w.Word).ToArray <string>()), "\n"); } str = string.Concat(str, "\t\t\t</span>\n"); empty = str; } return(empty); }
private List <HocrLineModel> SortLineList(List <HocrLineModel> lines) { List <HocrLineModel> hocrLineModels = new List <HocrLineModel>(); IEnumerable <IGrouping <double, HocrLineModel> > groupings = from l in lines group l by l.YCord; foreach (IGrouping <double, HocrLineModel> nums in groupings) { HocrLineModel hocrLineModel = new HocrLineModel() { FontNameFirstWord = nums.ElementAt <HocrLineModel>(0).FontNameFirstWord, FontSizeFirstWord = nums.ElementAt <HocrLineModel>(0).FontSizeFirstWord }; hocrLineModel.Words.AddRange(nums.SelectMany <HocrLineModel, WordData>((HocrLineModel g) => g.Words).ToList <WordData>()); hocrLineModel.XCord = ( from w in hocrLineModel.Words select w.XCord).Min(); hocrLineModel.XCord1 = ( from w in hocrLineModel.Words select w.XCord1).Max(); hocrLineModel.YCord = ( from w in hocrLineModel.Words select w.YCord).Min(); hocrLineModel.YCord1 = ( from w in hocrLineModel.Words select w.YCord1).Max(); hocrLineModels.Add(hocrLineModel); } hocrLineModels = ( from l in hocrLineModels orderby l.YCord select l).ToList <HocrLineModel>(); for (int i = 0; i < hocrLineModels.Count; i++) { HocrLineModel item = hocrLineModels[i]; int num = this.lineID1; this.lineID1 = num + 1; item.LineID = num; } return(hocrLineModels); }
private void endLine() { HocrLineModel hocrLineModel = new HocrLineModel(); if (this.wordList.Count > 0) { string str = string.Join(" ", ( from w in this.wordList select w.Word).ToArray <string>()); hocrLineModel.Text = (new Regex("\\s+(\\p{P})")).Replace(str, "$1"); hocrLineModel.FontNameFirstWord = this.wordList[0].FontName; hocrLineModel.FontSizeFirstWord = this.wordList[0].FontSize; hocrLineModel.XCord = this.wordList[0].XCord; hocrLineModel.XCord1 = this.wordList[this.wordList.Count - 1].XCord1; hocrLineModel.YCord = this.wordList.Min <WordData>((WordData y) => y.YCord); hocrLineModel.YCord1 = this.wordList.Max <WordData>((WordData y) => y.YCord1); hocrLineModel.Words.AddRange(this.wordList); this.lineList.Add(hocrLineModel); } this.initialXcoord = 0; this.wordList.Clear(); }