// METHOD ALAIN: KORTER, EVENTUEEL UITZOEKEN... WERKT NOG NIET ivm indexes /* * // Splitted string at index position * * string temp = str; * for (int i = 0; i < indexList.Count; i++) * { * temp = temp.Insert(i + indexList[i], " "); * Debug.WriteLine(temp); * } * string[] splitted = temp.Trim(new char[] { ' ' }).Split(' '); * * // Found date . First 4 lenght string on the end and add '*' before and at the end * int p = splitted.Length - 1; * bool DateFound = false; * while (p >= 0 && !DateFound) * { * Debug.WriteLine(splitted[p]); * if (splitted[p].Length == 4) * { * result.IndexDateStart = indexList[p - 1]; * result.IndexDateEnd = indexList[p] - 1; * splitted[p] = $"*{splitted[p]}*"; * } * p--; * } * * // Concate all to one string * string temp2 = String.Concat(splitted); * * // DOT*DATE*CLUTTER => DOT,DATE,CLUTTER * string[] Splitted2 = temp2.Split('*'); * result.Dot = Splitted2[0]; * result.Date = Splitted2[1]; * string Clutter = Splitted2[2]; * * return result; * } */ public ResultB SegmentB(SegmentInput B, double devW, double devSpace) { //Remove clutter from end of string of B, we presume only one segment of clutter and with a bigger space ResultB result = new ResultB(); //final result for B SegmentResult segmentRes = Segment(B, devW); //Segment B --> list of indexes and spaces int nSpace = segmentRes.SpaceList.Count; int nSegm = segmentRes.SegmentIndexList.Count; string str = segmentRes.FilteredString; int nStr = segmentRes.FilteredString.Length; int indexDotEnd = 0; if (nStr > 0) //At least 1 char in string { indexDotEnd = nStr - 1; //We assume last segment is not clutter or have no previous space to compare to if (nSpace > 1) //At least 2 spaces present--> compare last space to 1st space { double spaceRef = segmentRes.SpaceList[0]; //take first space as reference if (segmentRes.SpaceList[nSpace - 1] > devSpace * spaceRef) //last space larger than allowed deviation (= devSpace * spaceRef) { //last segment is clutter indexDotEnd = segmentRes.SegmentIndexList[nSegm - 1] - 1;//index of last segment - 1 = end of previous segm } } result.Dot = str.Substring(0, indexDotEnd + 1); } else //if nStr=0 { indexDotEnd = 0; result.Dot = ""; } result.IndexDotStart = 0;//We presume dot always starts at 0 (no clutter in beginning) result.IndexDotEnd = indexDotEnd; return(result); }
public ResultA SegmentA(SegmentInput A, double devW) { //Segment A in Dot + Date ResultA result = new ResultA(); //final result for A SegmentResult segmentRes = Segment(A, devW); //Segment A --> list of indexes and spaces int n = segmentRes.FilteredString.Length; int indexStartDate = 0; int indexEndDate = 0; string str = segmentRes.FilteredString; string date = ""; string dot = ""; List <int> indexList = new List <int>(); indexList = segmentRes.SegmentIndexList; // METHOD ADRIAAN //We have segmented first, now we want to check if the last segment is indeed the date timestamp //We work from right to left! if (n > 0 && indexList.Count > 1) //At least 2 segments { int i = (indexList.Count - 1); //Start from the last segment (ex. count = 12 --> last index = 11) int j = n; //end of last segment while (i > 0) //At least 2 segments { if ((j - indexList[i]) > 3) //timestamp = 4 karakters, length of segment { indexStartDate = indexList[i]; if (i != (indexList.Count - 1)) //if not the last segment { indexEndDate = indexList[i + 1] - 1; //previous index of segment -1, is end of current segment } else //if last segment { indexEndDate = n - 1; } break; } j = indexList[i]; // length of next segment = previous index - start index next segment i--; //Work from right to left } } if (n > 3 && indexList.Count == 1) //Only 1 segment and string at least 4 char --> we assume last 4 characters are timestamp { indexEndDate = n - 1; indexStartDate = n - 4; } if (indexStartDate > 0 && indexEndDate >= (indexStartDate + 4)) { date = str.Substring(indexStartDate, ((indexEndDate + 1) - indexStartDate)); dot = str.Substring(0, indexStartDate); } else { date = "0000"; indexEndDate = n; dot = str; } result.Date = date; result.Dot = dot; result.IndexDateEnd = indexEndDate; result.IndexDateStart = indexStartDate; return(result); }
public SegmentResult Segment(SegmentInput input, double devW) { //Segment into list of Segment indexes and spaces SegmentResult result = new SegmentResult(); PreFilterResult filtered = PreFilter(input);//Filter rubbish end of string input.OcrString = filtered.OcrFiltered; ViDiReadResultList lists = GenerateList(input); //Use filtered string to generate lists double w = 0; double xCurr = 0; double xPrev = 0; int nChar = lists.Read.Length; double dev = devW; //consider space when difference between two positions is > dev * characterwidth List <double> x_list = new List <double>(); //list for width of characters x_list = lists.XPosList; List <double> w_list = new List <double>(); //list for width of characters w_list = lists.WidthList; List <double> segm_x_list = new List <double>(); //list for x-positions of start of new segment List <int> segm_i_list = new List <int>(); //list for character indexes of start of new segment List <double> space_list = new List <double>(); //list for size of spaces of segment if (nChar > 0 && x_list.Count == w_list.Count && x_list.Count == nChar) //Lists have same number of elements(should be the case) { w = w_list.Max(); //retain widest character as reference for calculating space //First segment starts at first character - index 0 for X-position xCurr = x_list[0]; segm_x_list.Add(x_list[0]); segm_i_list.Add(0); //Loop through the remaining characters --> list of index positions with start of segments xPrev = xCurr; int i = 1; //start at 1, we handled 0 above while (i < nChar) { xCurr = x_list[i]; if ((xCurr - xPrev) > (dev * w)) //consider space when difference between two positions is > dev * characterwidth -->New segment { segm_x_list.Add(xCurr); segm_i_list.Add(i); space_list.Add(xCurr - xPrev); } xPrev = xCurr; i++; } } else { // } result.Nsegments = segm_i_list.Count; //Get number of segments result.SegmentIndexList = segm_i_list; result.SpaceList = space_list; result.FilteredString = lists.Read; return(result); }