static Tuple<string, ItemRecord> ProcessLinkName(XElement span) { try { var link = span.Element("a"); var href = link.Attribute("href").Value; var text = link.Element("strong").Value; if (href.StartsWith("'/opac/opac_details.cgi?")) { var i = href.IndexOf("&bibid="); if (i != -1) { i += "&bibid=".Length; var record = new ItemRecord { BibID = href.Substring(i, href.IndexOf('&', i) - i) }; return new Tuple<string, ItemRecord>(text, record); } i = href.IndexOf("&ncid="); if (i != -1) { i += "&ncid=".Length; var record = new ItemRecord { NCID = href.Substring(i, href.IndexOf('&', i) - i) }; return new Tuple<string, ItemRecord>(text, record); } return new Tuple<string, ItemRecord>(text, null); } else if (href.StartsWith("'http://vs2ga4mq9g")) { var url = href.Substring(1, href.IndexOf('\'', "'http://vs2ga4mq9g".Length) - 1); if (href.Contains("encodeURIComponent")) url += HttpUtility.UrlEncode(text); return new Tuple<string, ItemRecord>(text, new ItemRecord { URL = url }); } else throw new NotImplementedException(); } catch { return new Tuple<string, ItemRecord>(span.Value, null); } }
public static Tuple<List<Tuple<string, string>>, List<Dictionary<string, string>>> ExtractDataByDetailPage(ItemRecord bookID) { string lookupURL; if (bookID.BibID != null) lookupURL = string.Format(lookupBibid, bookID.BibID); else throw new ArgumentException("bookID"); var detailPage = DownloadUTF8(lookupURL); // Start to get the detail. var detail = new List<Tuple<string, string>>(); int i, j; // Obtain the book title. i = detailPage.IndexOf(bookDetailTitleBegin) + bookDetailTitleBegin.Length; var title = detailPage.Substring(i, detailPage.IndexOf(bookDetailTitleEnd, i) - i); detail.Add(new Tuple<string, string>("Title", title)); // Obtain each detail field of the book. while ((j = detailPage.IndexOf(bookDetailEntryBegin, i)) != -1) { i = detailPage.IndexOf(bookDetailEntryType1, j + bookDetailEntryBegin.Length); i = detailPage.IndexOf(bookDetailEntryType2, i) + bookDetailEntryType2.Length; j = detailPage.IndexOf(bookDetailEntryValue, i); var type = detailPage.Substring(i, j - i); j += bookDetailEntryValue.Length; i = detailPage.IndexOf(bookDetailEntryEnd, j); var value = detailPage.Substring(j, i - j); detail.Add(new Tuple<string, string>(type, value)); } // If the book is stored at UT Library, get the location list. var collectionList = new List<Dictionary<string, string>>(); while ((i = detailPage.IndexOf(libraryEntryBegin, i)) != -1) { j = detailPage.IndexOf(libraryEntryEnd, i += libraryEntryBegin.Length); var doc = XDocument.Parse("<A>" + detailPage.Substring(i, j - i) + "</A>"); var book = new Dictionary<string, string>(); foreach (var elem in doc.Descendants("td")) { var fieldName = elem.Attribute("class").Value; string fieldValue = null; var link = elem.Element("a"); if (link != null) fieldValue = link.Value; else if (elem.Element("br") == null) fieldValue = elem.Value; book.Add(fieldName, fieldValue); } collectionList.Add(book); } return new Tuple<List<Tuple<string, string>>, List<Dictionary<string, string>>>(detail, collectionList); }