Example #1
0
        static Tuple<string, ItemRecord> ProcessLinkName(XElement span)
        {
            try
            {
                var link = span.Element("a");
                var href = link.Attribute("href").Value;
                var text = link.Element("strong").Value;

                if (href.StartsWith("'/opac/opac_details.cgi?"))
                {
                    var i = href.IndexOf("&bibid=");
                    if (i != -1)
                    {
                        i += "&bibid=".Length;
                        var record = new ItemRecord { BibID = href.Substring(i, href.IndexOf('&', i) - i) };
                        return new Tuple<string, ItemRecord>(text, record);
                    }
                    i = href.IndexOf("&ncid=");
                    if (i != -1)
                    {
                        i += "&ncid=".Length;
                        var record = new ItemRecord { NCID = href.Substring(i, href.IndexOf('&', i) - i) };
                        return new Tuple<string, ItemRecord>(text, record);
                    }
                    return new Tuple<string, ItemRecord>(text, null);
                }
                else if (href.StartsWith("'http://vs2ga4mq9g"))
                {
                    var url = href.Substring(1, href.IndexOf('\'', "'http://vs2ga4mq9g".Length) - 1);
                    if (href.Contains("encodeURIComponent")) url += HttpUtility.UrlEncode(text);
                    return new Tuple<string, ItemRecord>(text, new ItemRecord { URL = url });
                }
                else throw new NotImplementedException();
            }
            catch { return new Tuple<string, ItemRecord>(span.Value, null); }
        }
Example #2
0
        public static Tuple<List<Tuple<string, string>>, List<Dictionary<string, string>>> ExtractDataByDetailPage(ItemRecord bookID)
        {
            string lookupURL;
            if (bookID.BibID != null) lookupURL = string.Format(lookupBibid, bookID.BibID);
            else throw new ArgumentException("bookID");
            var detailPage = DownloadUTF8(lookupURL);

            // Start to get the detail.
            var detail = new List<Tuple<string, string>>();

            int i, j;
            // Obtain the book title.
            i = detailPage.IndexOf(bookDetailTitleBegin) + bookDetailTitleBegin.Length;
            var title = detailPage.Substring(i, detailPage.IndexOf(bookDetailTitleEnd, i) - i);
            detail.Add(new Tuple<string, string>("Title", title));

            // Obtain each detail field of the book.
            while ((j = detailPage.IndexOf(bookDetailEntryBegin, i)) != -1)
            {
                i = detailPage.IndexOf(bookDetailEntryType1, j + bookDetailEntryBegin.Length);
                i = detailPage.IndexOf(bookDetailEntryType2, i) + bookDetailEntryType2.Length;
                j = detailPage.IndexOf(bookDetailEntryValue, i);
                var type = detailPage.Substring(i, j - i);
                j += bookDetailEntryValue.Length;
                i = detailPage.IndexOf(bookDetailEntryEnd, j);
                var value = detailPage.Substring(j, i - j);
                detail.Add(new Tuple<string, string>(type, value));
            }

            // If the book is stored at UT Library, get the location list.
            var collectionList = new List<Dictionary<string, string>>();
            while ((i = detailPage.IndexOf(libraryEntryBegin, i)) != -1)
            {
                j = detailPage.IndexOf(libraryEntryEnd, i += libraryEntryBegin.Length);
                var doc = XDocument.Parse("<A>" + detailPage.Substring(i, j - i) + "</A>");

                var book = new Dictionary<string, string>();
                foreach (var elem in doc.Descendants("td"))
                {
                    var fieldName = elem.Attribute("class").Value;
                    string fieldValue = null;
                    var link = elem.Element("a");
                    if (link != null) fieldValue = link.Value;
                    else if (elem.Element("br") == null) fieldValue = elem.Value;
                    book.Add(fieldName, fieldValue);
                }
                collectionList.Add(book);
            }

            return new Tuple<List<Tuple<string, string>>, List<Dictionary<string, string>>>(detail, collectionList);
        }