/// <summary> /// Identifies what type of error the two lines have /// based on the different words, letters and symbols found. /// </summary> /// <param name="actual"></param> /// <param name="expected"></param> /// <returns></returns> private Enum.MistakeType IdentifyTypeOfMistake(string actual, string expected) { Enum.MistakeType type = Enum.MistakeType.MAJOR; //Checking if it is identified wrongly as different instead of orphan if (actual.Equals("") || expected.Equals("")) { return(Enum.MistakeType.MINOR); } List <Diff> difference = GetWrongWordsDifferentArray(actual, expected); /* * Categorizes differences as minor or major * Minor mistakes examples: ; instead of : or . instead of , * Moderate mistakes examples: one letter classified wrongly * Major mistakes examples: more than one letter wrong, gibberish or junk */ if (difference.Count < 2) { //Checks if there is only 1 difference, how many letters it is and if it more than 5 categorize it as moderate. if (difference[0].text.Length > 2) { type = Enum.MistakeType.MODERATE; } else { type = Enum.MistakeType.MINOR; } } //Checks if the error is just 1 character changes and the character is either .,;~- else if (difference.Count == 2 && difference[0].text.Length < 2 && difference[1].text.Length < 2) { if (difference[0].text.Equals(".") || difference[0].text.Equals(",") || difference[1].text.Equals(".") || difference[1].text.Equals(",") || difference[0].text.Equals(":") || difference[1].text.Equals(":") || difference[0].text.Equals(";") || difference[1].text.Equals(";") || difference[0].text.Equals("~") || difference[1].text.Equals("~") || difference[0].text.Equals("-") || difference[1].text.Equals("-") || difference[0].text.Equals(" ") || difference[1].text.Equals(" ")) { type = Enum.MistakeType.MINOR; } else { type = Enum.MistakeType.MODERATE; } } else if (difference.Count < 5) { //If there are more than 4 characters wrong identify it as major. int numberOfWrongCharacters = 0; foreach (Diff word in difference) { numberOfWrongCharacters += word.text.Length; } if (numberOfWrongCharacters < 5) { type = Enum.MistakeType.MODERATE; } else { type = Enum.MistakeType.MAJOR; } } else { type = Enum.MistakeType.MAJOR; } return(type); }
/// <summary> /// Inserts into the list all the results of the /// comparison of the two text files, line by line /// </summary> /// <param name="allRows"></param> /// <returns></returns> private (List <OcrResultsPerLine>, bool) PopulateOcrResultsArray(XmlNodeList allRows) { bool allLinesCorrect = true; List <OcrResultsPerLine> ocrResultsPerLineList = new List <OcrResultsPerLine>(); int index = 0; foreach (XmlNode node in allRows) { OcrResultsPerLine line = new OcrResultsPerLine(); if (node.OuterXml.Contains("rightorphan")) { line.Index = index; line.ActualText = node.InnerText; line.ExpectedText = ""; line.Category = Enum.MistakeType.MINOR; line.Comment = ""; line.PassedResult = false; allLinesCorrect = false; } else if (node.OuterXml.Contains("leftorphan")) { line.Index = index; line.ActualText = ""; line.ExpectedText = node.InnerText; line.Category = Enum.MistakeType.MINOR; line.Comment = ""; line.PassedResult = false; allLinesCorrect = false; } else if (node.OuterXml.Contains("similar")) { //This means that they are exactly equal and similar which means whitespace so ignore the line if (node.LastChild.InnerText.Trim().Equals(node.FirstChild.InnerText.Trim())) { continue; } line.Index = index; line.ActualText = node.LastChild.InnerText; line.ExpectedText = node.FirstChild.InnerText; line.Category = Enum.MistakeType.MINOR; line.Comment = ""; line.PassedResult = false; allLinesCorrect = false; } else if (node.OuterXml.Contains("different")) { string actual = node.LastChild.InnerText; string expected = node.FirstChild.InnerText; line.Index = index; line.ActualText = actual; line.ExpectedText = expected; line.Comment = ""; line.PassedResult = false; allLinesCorrect = false; Enum.MistakeType type = IdentifyTypeOfMistake(actual, expected); if (type.Equals(Enum.MistakeType.MAJOR)) { line.Category = Enum.MistakeType.MAJOR; } else if (type.Equals(Enum.MistakeType.MODERATE)) { line.Category = Enum.MistakeType.MODERATE; } else if (type.Equals(Enum.MistakeType.MINOR)) { line.Category = Enum.MistakeType.MINOR; } } else if (node.OuterXml.Contains("same") && node.InnerText.Trim().Length > 0) { string actual = node.InnerText; string expected = node.InnerText; line.Index = index; line.ActualText = actual; line.ExpectedText = expected; line.Comment = ""; line.PassedResult = true; line.Category = Enum.MistakeType.NO_MISTAKE; } //means line is empty. else { continue; } ocrResultsPerLineList.Add(line); index++; } return(ocrResultsPerLineList, allLinesCorrect); }