public static StringBuilder ScanText(string s) { if (s.Length < 20) { return(new StringBuilder()); } s = FixOCRConfusing1L(s, "A[0-9l]+"); s = FixOCRConfusing1L(s, "B[0-9l]+"); // Define a regular expression for repeated words. Regex rx = new Regex("[AB][0-9]+"); // Find matches. MatchCollection matches = rx.Matches(s); if (matches.Count == 0) { return(new StringBuilder("No match found.")); } // Report on each match. TTInfo[] vInfo = new TTInfo[matches.Count]; int i = 0; foreach (Match match in matches) { GroupCollection groups = match.Groups; vInfo[i] = new TTInfo(); vInfo[i].ID_idx = groups[0].Index; vInfo[i].ID = groups[0].Value; ++i; } string[] residues = new string[vInfo.Length]; int a; for (i = 0; i < vInfo.Length - 1; ++i) { a = vInfo[i].ID_idx + vInfo[i].ID.Length; residues[i] = s.Substring(a, vInfo[i + 1].ID_idx - 1 - a); } a = vInfo.Length - 1; residues[a] = s.Substring(vInfo[a].ID_idx + vInfo[a].ID.Length); i = 0; foreach (string t in residues) { SearchDate("[0-9]+-[0-9]+-[0-9]+", t, ref vInfo[i]); if (vInfo[i].Birthday_idx < 0) { SearchDate("[0-9]+/[0-9]+/[0-9]+", t, ref vInfo[i]); if (vInfo[i].Birthday_idx < 0) { SearchDate("[0-9]+-[0-9]+", t, ref vInfo[i]); if (vInfo[i].Birthday_idx < 0) { SearchDate("[0-9]+/[0-9]+", t, ref vInfo[i]); if (vInfo[i].Birthday_idx < 0) { SearchDate("[0-9]+", t, ref vInfo[i]); } } } } ++i; } i = 0; foreach (string t in residues) { if (-1 < vInfo[i].Birthday_idx) { vInfo[i].Name = t.Substring(0, vInfo[i].Birthday_idx); int birthPlaceX = vInfo[i].Birthday_idx + vInfo[i].Birthday.Length + 1; if (birthPlaceX < t.Length) { vInfo[i].Birthplace = t.Substring(birthPlaceX); } } else { vInfo[i].Name = t; } ++i; } // Report the number of matches found. StringBuilder sb = new StringBuilder(); foreach (TTInfo info in vInfo) { info.CleanUp(); sb.Append(info.ToString() + "\r\n"); } sb.Append(matches.Count + " found."); return(sb); }