public void ReadOCRResults(string dir) { DirectoryInfo dirinfo = new DirectoryInfo(dir); foreach (FileInfo NextFile in dirinfo.GetFiles()) // { if (NextFile.Extension == ".docx" && !NextFile.Name.Contains('~')) { string fn = Path.GetFileNameWithoutExtension(NextFile.Name); string[] token = fn.Split('_'); int num = Convert.ToInt16(token[0]); int ch = Convert.ToInt16(token[2]); int x = Convert.ToInt16(token[3]); int y = Convert.ToInt16(token[4]); double slope = Convert.ToInt16(token[6]); int bbxx = Convert.ToInt16(token[7]); int bbxy = Convert.ToInt16(token[8]); int bbxw = Convert.ToInt16(token[9]); int bbxh = Convert.ToInt16(token[10]); textlabel_list.Add(new TextLabel(num,ch,x,y,slope,fn, bbxx,bbxy,bbxw,bbxh)); } } for (int i = 0; i < textlabel_list.Count; i++) { TextLabel textlabel = textlabel_list[i]; DocxToText dtt = new DocxToText(dir + textlabel.fn + ".docx"); textlabel.text = dtt.ExtractText(); textlabel.susp_char_count = dtt.susp_char_count; //if (textlabel.text == "" || textlabel.text == null ||suscharratio(textlabel))// || line_counter > 1) //{ // textlabel_list.RemoveAt(i); // i--; //} } ResultMerger(); }
private string ReadWord(string filePath) { DocxToText dtt = new DocxToText(filePath); return dtt.ExtractText(); }