Ejemplo n.º 1
0
        /// <summary>
        /// Identifies what type of error the two lines have
        /// based on the different words, letters and symbols found.
        /// </summary>
        /// <param name="actual"></param>
        /// <param name="expected"></param>
        /// <returns></returns>
        private Enum.MistakeType IdentifyTypeOfMistake(string actual, string expected)
        {
            Enum.MistakeType type = Enum.MistakeType.MAJOR;
            //Checking if it is identified wrongly as different instead of orphan
            if (actual.Equals("") || expected.Equals(""))
            {
                return(Enum.MistakeType.MINOR);
            }
            List <Diff> difference = GetWrongWordsDifferentArray(actual, expected);

            /*
             * Categorizes differences as minor or major
             * Minor mistakes examples: ; instead of : or . instead of ,
             * Moderate mistakes examples: one letter classified wrongly
             * Major mistakes examples: more than one letter wrong, gibberish or junk
             */
            if (difference.Count < 2)
            {
                //Checks if there is only 1 difference, how many letters it is and if it more than 5 categorize it as moderate.
                if (difference[0].text.Length > 2)
                {
                    type = Enum.MistakeType.MODERATE;
                }
                else
                {
                    type = Enum.MistakeType.MINOR;
                }
            }
            //Checks if the error is just 1 character changes and the character is either .,;~-
            else if (difference.Count == 2 && difference[0].text.Length < 2 && difference[1].text.Length < 2)
            {
                if (difference[0].text.Equals(".") || difference[0].text.Equals(",") || difference[1].text.Equals(".") ||
                    difference[1].text.Equals(",") || difference[0].text.Equals(":") || difference[1].text.Equals(":") ||
                    difference[0].text.Equals(";") || difference[1].text.Equals(";") || difference[0].text.Equals("~") ||
                    difference[1].text.Equals("~") || difference[0].text.Equals("-") || difference[1].text.Equals("-") ||
                    difference[0].text.Equals(" ") || difference[1].text.Equals(" "))
                {
                    type = Enum.MistakeType.MINOR;
                }
                else
                {
                    type = Enum.MistakeType.MODERATE;
                }
            }
            else if (difference.Count < 5)
            {
                //If there are more than 4 characters wrong identify it as major.
                int numberOfWrongCharacters = 0;
                foreach (Diff word in difference)
                {
                    numberOfWrongCharacters += word.text.Length;
                }
                if (numberOfWrongCharacters < 5)
                {
                    type = Enum.MistakeType.MODERATE;
                }
                else
                {
                    type = Enum.MistakeType.MAJOR;
                }
            }
            else
            {
                type = Enum.MistakeType.MAJOR;
            }
            return(type);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Inserts into the list all the results of the
        /// comparison of the two text files, line by line
        /// </summary>
        /// <param name="allRows"></param>
        /// <returns></returns>
        private (List <OcrResultsPerLine>, bool) PopulateOcrResultsArray(XmlNodeList allRows)
        {
            bool allLinesCorrect = true;
            List <OcrResultsPerLine> ocrResultsPerLineList = new List <OcrResultsPerLine>();
            int index = 0;

            foreach (XmlNode node in allRows)
            {
                OcrResultsPerLine line = new OcrResultsPerLine();
                if (node.OuterXml.Contains("rightorphan"))
                {
                    line.Index        = index;
                    line.ActualText   = node.InnerText;
                    line.ExpectedText = "";
                    line.Category     = Enum.MistakeType.MINOR;
                    line.Comment      = "";
                    line.PassedResult = false;
                    allLinesCorrect   = false;
                }
                else if (node.OuterXml.Contains("leftorphan"))
                {
                    line.Index        = index;
                    line.ActualText   = "";
                    line.ExpectedText = node.InnerText;
                    line.Category     = Enum.MistakeType.MINOR;
                    line.Comment      = "";
                    line.PassedResult = false;
                    allLinesCorrect   = false;
                }
                else if (node.OuterXml.Contains("similar"))
                {
                    //This means that they are exactly equal and similar which means whitespace so ignore the line
                    if (node.LastChild.InnerText.Trim().Equals(node.FirstChild.InnerText.Trim()))
                    {
                        continue;
                    }
                    line.Index        = index;
                    line.ActualText   = node.LastChild.InnerText;
                    line.ExpectedText = node.FirstChild.InnerText;
                    line.Category     = Enum.MistakeType.MINOR;
                    line.Comment      = "";
                    line.PassedResult = false;
                    allLinesCorrect   = false;
                }
                else if (node.OuterXml.Contains("different"))
                {
                    string actual   = node.LastChild.InnerText;
                    string expected = node.FirstChild.InnerText;
                    line.Index        = index;
                    line.ActualText   = actual;
                    line.ExpectedText = expected;
                    line.Comment      = "";
                    line.PassedResult = false;
                    allLinesCorrect   = false;
                    Enum.MistakeType type = IdentifyTypeOfMistake(actual, expected);
                    if (type.Equals(Enum.MistakeType.MAJOR))
                    {
                        line.Category = Enum.MistakeType.MAJOR;
                    }
                    else if (type.Equals(Enum.MistakeType.MODERATE))
                    {
                        line.Category = Enum.MistakeType.MODERATE;
                    }
                    else if (type.Equals(Enum.MistakeType.MINOR))
                    {
                        line.Category = Enum.MistakeType.MINOR;
                    }
                }
                else if (node.OuterXml.Contains("same") && node.InnerText.Trim().Length > 0)
                {
                    string actual   = node.InnerText;
                    string expected = node.InnerText;
                    line.Index        = index;
                    line.ActualText   = actual;
                    line.ExpectedText = expected;
                    line.Comment      = "";
                    line.PassedResult = true;
                    line.Category     = Enum.MistakeType.NO_MISTAKE;
                }
                //means line is empty.
                else
                {
                    continue;
                }
                ocrResultsPerLineList.Add(line);
                index++;
            }

            return(ocrResultsPerLineList, allLinesCorrect);
        }