//Before segmenting we want to get rid of rubbish in the back of the string public PreFilterResult PreFilter(SegmentInput Input) { PreFilterResult result = new PreFilterResult(); string str = Input.OcrString; if (str != null) { int length = Input.OcrString.Length; if (length > 0) { byte[] ascii = Encoding.ASCII.GetBytes(str); char[] str_char = str.ToCharArray(); //string is read-only, have to modify the character array and convert back to string var char_list = new List <char>(str_char); //Convert to List to be able to easily remove at position //removing characters which not numbers or letters, start from right to left //only remove at the end of the string int i = length - 1; while (i > 7) //Min. Dot length = 7 { int a = (int)ascii[i]; if (!((a > 47 && a < 58) || (a > 64 && a < 91))) //Only capital letters and numbers 0-9 allowed { char_list.RemoveAt(i); } else { break; //only remove rubbisch char at end of string (, we keep order of indexes in List) } i--; } str_char = char_list.ToArray(); str = new string(str_char); } } else { str = ""; } result.OcrFiltered = str; return(result); }
public SegmentResult Segment(SegmentInput input, double devW) { //Segment into list of Segment indexes and spaces SegmentResult result = new SegmentResult(); PreFilterResult filtered = PreFilter(input);//Filter rubbish end of string input.OcrString = filtered.OcrFiltered; ViDiReadResultList lists = GenerateList(input); //Use filtered string to generate lists double w = 0; double xCurr = 0; double xPrev = 0; int nChar = lists.Read.Length; double dev = devW; //consider space when difference between two positions is > dev * characterwidth List <double> x_list = new List <double>(); //list for width of characters x_list = lists.XPosList; List <double> w_list = new List <double>(); //list for width of characters w_list = lists.WidthList; List <double> segm_x_list = new List <double>(); //list for x-positions of start of new segment List <int> segm_i_list = new List <int>(); //list for character indexes of start of new segment List <double> space_list = new List <double>(); //list for size of spaces of segment if (nChar > 0 && x_list.Count == w_list.Count && x_list.Count == nChar) //Lists have same number of elements(should be the case) { w = w_list.Max(); //retain widest character as reference for calculating space //First segment starts at first character - index 0 for X-position xCurr = x_list[0]; segm_x_list.Add(x_list[0]); segm_i_list.Add(0); //Loop through the remaining characters --> list of index positions with start of segments xPrev = xCurr; int i = 1; //start at 1, we handled 0 above while (i < nChar) { xCurr = x_list[i]; if ((xCurr - xPrev) > (dev * w)) //consider space when difference between two positions is > dev * characterwidth -->New segment { segm_x_list.Add(xCurr); segm_i_list.Add(i); space_list.Add(xCurr - xPrev); } xPrev = xCurr; i++; } } else { // } result.Nsegments = segm_i_list.Count; //Get number of segments result.SegmentIndexList = segm_i_list; result.SpaceList = space_list; result.FilteredString = lists.Read; return(result); }