public static AttendenceAndHonors Parse(string html) { AttendenceAndHonors result = new AttendenceAndHonors(); var semestersRegex = new Regex( "<img src=./image/or_ball.gif>([^\n]+)\n" + "\\s<blockquote>((?:(?!<\\/blockquote>).|\n)+)<\\/blockquote>\n<blockquote>((?:(?!<\\/blockquote>).|\n)+)<\\/blockquote>"); var semestersMatches = semestersRegex.Matches(html); foreach (Match semesterMatch in semestersMatches) { Semester semester = new Semester(); semester.Name = semesterMatch.Groups[1].Value; var honorStatisticsRegex = new Regex("^<tr><td align=center BGCOLOR=CCFFFF>([^<]+)<td align=center BGCOLOR=99FF99>([^<\n]+)\n", RegexOptions.Multiline | RegexOptions.IgnoreCase); var honorStatisticsMatches = honorStatisticsRegex.Matches(semesterMatch.Groups[2].Value); foreach (Match match in honorStatisticsMatches) { semester.HonorsStatistics.Add(match.Groups[1].Value, int.Parse(match.Groups[2].Value)); } var honorDetailsRegex = new Regex( "^<tr><td align=center BGCOLOR=CCFFFF>([^<]+)" + "<td align=center BGCOLOR=99FF99>([^<]+)" + "<td align=center BGCOLOR=CCFFFF>([^<]+)" + "<td align=Left BGCOLOR=99FF99>([^\n]+)", RegexOptions.Multiline | RegexOptions.IgnoreCase); var honorDetailsMatches = honorDetailsRegex.Matches(semesterMatch.Groups[2].Value); foreach (Match match in honorDetailsMatches) { Honor honor = new Honor(); honor.Time = match.Groups[1].Value; honor.Type = match.Groups[2].Value; honor.Count = int.Parse(match.Groups[3].Value); honor.Detail = match.Groups[4].Value; semester.HonorDetails.Add(honor); } var attendenceStatisticsRegex = new Regex("^<tr><td align=center BGCOLOR=CCFFFF>([^<]+)<td align=center BGCOLOR=99FF99>([^<\n]+)\n", RegexOptions.Multiline | RegexOptions.IgnoreCase); var attendenceStatisticsMatches = attendenceStatisticsRegex.Matches(semesterMatch.Groups[3].Value); foreach (Match match in attendenceStatisticsMatches) { semester.AttendenceStatistics.Add(match.Groups[1].Value == "遲到�早退" ? "遲到/早退" : match.Groups[1].Value, int.Parse(match.Groups[2].Value)); } var attendenceDetailsRegex = new Regex( "^<tr><td align=center BGCOLOR=CCFFFF>([^<]+)" + "<td align=center BGCOLOR=99FF99>([^<]+)" + "<td align=center BGCOLOR=CCFFFF>([^<]+)" + "<td align=center BGCOLOR=99FF99>([^<]+)" + "<td align=center BGCOLOR=CCFFFF>([^<]+)" + "<td align=Left BGCOLOR=99FF99>([^\n]+)" , RegexOptions.Multiline | RegexOptions.IgnoreCase); var attendenceDetailsMatches = attendenceDetailsRegex.Matches(semesterMatch.Groups[3].Value); foreach (Match match in attendenceDetailsMatches) { Attendence attendence = new Attendence(); attendence.Week = int.Parse(match.Groups[1].Value); attendence.Time = match.Groups[2].Value; attendence.Session = match.Groups[3].Value; attendence.RollcallSheetNumber = match.Groups[4].Value; attendence.Type = match.Groups[5].Value == "遲到�早退" ? "遲到/早退" : match.Groups[5].Value; attendence.Detail = new Regex("<BR>$").Replace(match.Groups[6].Value, ""); semester.AttendenceDetails.Add(attendence); } result.Semesters.Add(semester); } return(result); }