public void Apply(string inputPath, string outputPath, string TesseractResultsJSONFileName, string lng, string dictionaryFilePath, int dictionaryExactMatchStringLength, double bbxW, double bbxN, double xscale, double yscale, string srid, double top, double left, double bottom, double right, bool elasticsearch) { try { string tessPath = ""; WrapperTesseract wt = new WrapperTesseract(tessPath, lng); List<TessResult> tessOcrResultList = wt.Apply(inputPath, outputPath, TesseractResultsJSONFileName); int georef = -1 ; if (elasticsearch) { CheckType ct = new CheckType(); tessOcrResultList = ct.Apply(tessOcrResultList, georef, lng); } CleanTesseractResult ctr = new CleanTesseractResult(); tessOcrResultList = ctr.Apply(tessOcrResultList, dictionaryFilePath, dictionaryExactMatchStringLength, lng, top, left, bottom, right, elasticsearch); Log.WriteLine("Writing results to GeoJSON..."); QGISJson.path = outputPath; QGISJson.Wx = bbxW; QGISJson.Ny = bbxN; QGISJson.yscale = yscale; QGISJson.xscale = xscale; QGISJson.srid = srid; QGISJson.Start(); QGISJson.filename = TesseractResultsJSONFileName; for (int i = 0; i < tessOcrResultList.Count; i++) { List<KeyValuePair<string, string>> items = new List<KeyValuePair<string, string>>(); if (tessOcrResultList[i].dict_word3 != null && tessOcrResultList[i].dict_word3.Length > 0) tessOcrResultList[i].dict_word3 = Regex.Replace(tessOcrResultList[i].dict_word3, "\n\n", ""); //if (tessOcrResultList[i].dict_word3 != null && tessOcrResultList[i].dict_word3.Length > 0) tessOcrResultList[i].dict_word3 = Regex.Replace(tessOcrResultList[i].dict_word3, "\n", ""); items.Add(new KeyValuePair<string, string>("NameAfterDictionary", tessOcrResultList[i].dict_word3)); if (tessOcrResultList[i].tess_word3.Length > 0) tessOcrResultList[i].tess_word3 = Regex.Replace(tessOcrResultList[i].tess_word3, "\n\n", ""); if (tessOcrResultList[i].tess_word3.Length > 0) tessOcrResultList[i].tess_word3 = Regex.Replace(tessOcrResultList[i].tess_word3, "\"", ""); if (tessOcrResultList[i].tess_word3.Length > 0) tessOcrResultList[i].tess_word3 = Regex.Replace(tessOcrResultList[i].tess_word3, "\n", ""); items.Add(new KeyValuePair<string, string>("NameBeforeDictionary", tessOcrResultList[i].tess_word3)); items.Add(new KeyValuePair<string, string>("ImageId", tessOcrResultList[i].id)); items.Add(new KeyValuePair<string, string>("DictionaryWordSimilarity", tessOcrResultList[i].dict_similarity.ToString())); items.Add(new KeyValuePair<string, string>("TesseractCost", tessOcrResultList[i].tess_cost3.ToString())); items.Add(new KeyValuePair<string, string>("SameMatches", tessOcrResultList[i].sameMatches)); QGISJson.AddFeature(tessOcrResultList[i].x, tessOcrResultList[i].y, tessOcrResultList[i].h, tessOcrResultList[i].w, georef, items); } QGISJson.WriteGeojsonFiles(); Log.WriteLine("GeoJSON generated"); } catch (Exception e) { Log.WriteLine(e.Message); Log.WriteLine(e.Source); Log.WriteLine(e.StackTrace); throw; } }
public List <TessResult> Apply(List <TessResult> tessOcrResultList, int georef, string lng) { double weight = 1.5; double left_weight = 2; double sweight = 0.5; int dictionaryExactMatchStringLength = 2; CleanTesseractResult ctr = new CleanTesseractResult(); tessOcrResultList = ctr.RemoveMergeMultiLineResults(tessOcrResultList, 3); for (int i = 0; i < tessOcrResultList.Count; i++) { if (tessOcrResultList[i].id != "-1" && lng == "eng") { tessOcrResultList[i] = ctr.CleanEnglish(tessOcrResultList[i]); } else if (tessOcrResultList[i].id != "-1" && lng == "chi_sim") { tessOcrResultList[i] = ctr.CleanChinese(tessOcrResultList[i]); } } for (int i = 0; i < tessOcrResultList.Count; i++) { if (tessOcrResultList[i].id == "-1") { tessOcrResultList.RemoveAt(i); } } for (int i = 0; i < tessOcrResultList.Count; i++) { if (tessOcrResultList[i].tess_word3.Length < dictionaryExactMatchStringLength || tessOcrResultList[i].id == "-1") { continue; } for (int j = i + 1; j < tessOcrResultList.Count; j++) { if (tessOcrResultList[j].tess_word3.Length < dictionaryExactMatchStringLength || tessOcrResultList[i].id == "-1") { continue; } int x1 = tessOcrResultList[i].x; int x2 = tessOcrResultList[j].x; int y1 = tessOcrResultList[i].y * georef; int y2 = tessOcrResultList[j].y * georef; int h = tessOcrResultList[i].h; int w = tessOcrResultList[i].w; if ((x2 - x1 <= weight * w && x2 - x1 >= 0 && (Math.Abs(y2 - y1) <= sweight * h)) || (y1 - y2 <= weight * h && y1 - y2 >= 0 && (Math.Abs(x2 - x1) <= sweight * w))) { tessOcrResultList[i].front = true; tessOcrResultList[j].back = true; //Log.WriteLine("1 Front: " + tessOcrResultList[i].tess_word3 + ". Back: " + tessOcrResultList[j].tess_word3 + ". x1: " + x1 + ". x2: " + x2 + ". y1:" + y1 + ". y2: " + y2 + ". w: " + w + ". h: " + h); } if ((x1 - x2 <= left_weight * w && x1 - x2 >= 0 && (Math.Abs(y2 - y1) <= sweight * h)) || (y2 - y1 <= weight * h && y2 - y1 >= 0 && (Math.Abs(x2 - x1) <= sweight * w))) { tessOcrResultList[i].back = true; tessOcrResultList[j].front = true; //Log.WriteLine("2 Front: " + tessOcrResultList[j].tess_word3 + ". Back: " + tessOcrResultList[i].tess_word3 + ". x1: " + x1 + ". x2: " + x2 + ". y1:" + y1 + ". y2: " + y2 + ". w: " + w + ". h: " + h); } } } for (int i = 0; i < tessOcrResultList.Count; i++) { if (tessOcrResultList[i].front && tessOcrResultList[i].back) { tessOcrResultList[i].front = false; tessOcrResultList[i].back = false; } else if (!tessOcrResultList[i].front && !tessOcrResultList[i].back) { tessOcrResultList[i].front = true; tessOcrResultList[i].back = false; } } return(tessOcrResultList); }
public List<TessResult> Apply(List<TessResult> tessOcrResultList, int georef, string lng) { double weight = 1.5; double left_weight = 2; double sweight = 0.5; int dictionaryExactMatchStringLength = 2; CleanTesseractResult ctr = new CleanTesseractResult(); tessOcrResultList = ctr.RemoveMergeMultiLineResults(tessOcrResultList,3); for (int i = 0; i < tessOcrResultList.Count; i++) { if (tessOcrResultList[i].id != "-1" && lng == "eng") { tessOcrResultList[i] = ctr.CleanEnglish(tessOcrResultList[i]); } else if (tessOcrResultList[i].id != "-1" && lng == "chi_sim") { tessOcrResultList[i] = ctr.CleanChinese(tessOcrResultList[i]); } } for (int i = 0; i < tessOcrResultList.Count; i++ ) { if (tessOcrResultList[i].id == "-1") tessOcrResultList.RemoveAt(i); } for (int i = 0; i < tessOcrResultList.Count; i++) { if (tessOcrResultList[i].tess_word3.Length < dictionaryExactMatchStringLength || tessOcrResultList[i].id == "-1") continue; for (int j = i + 1; j < tessOcrResultList.Count; j++) { if (tessOcrResultList[j].tess_word3.Length < dictionaryExactMatchStringLength || tessOcrResultList[i].id == "-1") continue; int x1 = tessOcrResultList[i].x; int x2 = tessOcrResultList[j].x; int y1 = tessOcrResultList[i].y * georef; int y2 = tessOcrResultList[j].y * georef; int h = tessOcrResultList[i].h; int w = tessOcrResultList[i].w; if ((x2 - x1 <= weight * w && x2 - x1 >= 0 && (Math.Abs(y2 - y1) <= sweight * h)) || (y1 - y2 <= weight * h && y1 - y2 >= 0 && (Math.Abs(x2 - x1) <= sweight * w))) { tessOcrResultList[i].front = true; tessOcrResultList[j].back = true; //Log.WriteLine("1 Front: " + tessOcrResultList[i].tess_word3 + ". Back: " + tessOcrResultList[j].tess_word3 + ". x1: " + x1 + ". x2: " + x2 + ". y1:" + y1 + ". y2: " + y2 + ". w: " + w + ". h: " + h); } if ((x1 - x2 <= left_weight * w && x1 - x2 >= 0 && (Math.Abs(y2 - y1) <= sweight * h)) || (y2 - y1 <= weight * h && y2 - y1 >= 0 && (Math.Abs(x2 - x1) <= sweight * w))) { tessOcrResultList[i].back = true; tessOcrResultList[j].front = true; //Log.WriteLine("2 Front: " + tessOcrResultList[j].tess_word3 + ". Back: " + tessOcrResultList[i].tess_word3 + ". x1: " + x1 + ". x2: " + x2 + ". y1:" + y1 + ". y2: " + y2 + ". w: " + w + ". h: " + h); } } } for (int i = 0; i < tessOcrResultList.Count; i++) { if (tessOcrResultList[i].front && tessOcrResultList[i].back) { tessOcrResultList[i].front = false; tessOcrResultList[i].back = false; } else if (!tessOcrResultList[i].front && !tessOcrResultList[i].back) { tessOcrResultList[i].front = true; tessOcrResultList[i].back = false; } } return tessOcrResultList; }