Esempio n. 1
0
        //Before segmenting we want to get rid of rubbish in the back of the string
        public PreFilterResult PreFilter(SegmentInput Input)
        {
            PreFilterResult result = new PreFilterResult();
            string          str    = Input.OcrString;

            if (str != null)
            {
                int length = Input.OcrString.Length;
                if (length > 0)
                {
                    byte[] ascii     = Encoding.ASCII.GetBytes(str);
                    char[] str_char  = str.ToCharArray();         //string is read-only, have to modify the character array and convert back to string
                    var    char_list = new List <char>(str_char); //Convert to List to be able to easily remove at position
                    //removing characters which not numbers or letters, start from right to left
                    //only remove at the end of the string
                    int i = length - 1;
                    while (i > 7)      //Min. Dot length = 7
                    {
                        int a = (int)ascii[i];
                        if (!((a > 47 && a < 58) || (a > 64 && a < 91))) //Only capital letters and numbers 0-9 allowed
                        {
                            char_list.RemoveAt(i);
                        }
                        else
                        {
                            break;     //only remove rubbisch char at end of string (, we keep order of indexes in List)
                        }
                        i--;
                    }
                    str_char = char_list.ToArray();
                    str      = new string(str_char);
                }
            }
            else
            {
                str = "";
            }


            result.OcrFiltered = str;
            return(result);
        }
Esempio n. 2
0
        public SegmentResult Segment(SegmentInput input, double devW)
        {
            //Segment into list of Segment indexes and spaces
            SegmentResult   result   = new SegmentResult();
            PreFilterResult filtered = PreFilter(input);//Filter rubbish end of string

            input.OcrString = filtered.OcrFiltered;
            ViDiReadResultList lists = GenerateList(input); //Use filtered string to generate lists

            double w     = 0;
            double xCurr = 0;
            double xPrev = 0;
            int    nChar = lists.Read.Length;
            double dev   = devW;                        //consider space when difference between two positions is > dev * characterwidth

            List <double> x_list = new List <double>(); //list for width of characters

            x_list = lists.XPosList;
            List <double> w_list = new List <double>(); //list for width of characters

            w_list = lists.WidthList;
            List <double> segm_x_list = new List <double>();                        //list for x-positions of start of new segment
            List <int>    segm_i_list = new List <int>();                           //list for character indexes of start of new segment
            List <double> space_list  = new List <double>();                        //list for size of spaces of segment

            if (nChar > 0 && x_list.Count == w_list.Count && x_list.Count == nChar) //Lists have same number of elements(should be the case)
            {
                w = w_list.Max();                                                   //retain widest character as reference for calculating space
                                                                                    //First segment starts at first character - index 0 for X-position
                xCurr = x_list[0];
                segm_x_list.Add(x_list[0]);
                segm_i_list.Add(0);
                //Loop through the remaining characters --> list of index positions with start of segments
                xPrev = xCurr;

                int i = 1; //start at 1, we handled 0 above
                while (i < nChar)
                {
                    xCurr = x_list[i];

                    if ((xCurr - xPrev) > (dev * w)) //consider space when difference between two positions is > dev * characterwidth -->New segment
                    {
                        segm_x_list.Add(xCurr);
                        segm_i_list.Add(i);
                        space_list.Add(xCurr - xPrev);
                    }

                    xPrev = xCurr;

                    i++;
                }
            }

            else
            {
                //
            }
            result.Nsegments        = segm_i_list.Count; //Get number of segments
            result.SegmentIndexList = segm_i_list;
            result.SpaceList        = space_list;
            result.FilteredString   = lists.Read;

            return(result);
        }