// Parses a single book. Book ParseBook(XmlReader reader) { var book = new Book(); while (reader.Read()) if (reader.NodeType == XmlNodeType.Element) { string parseType; switch (reader.LocalName) { case "etext": book.Number = int.Parse(reader.GetAttribute("ID", RDF).Substring(5), CultureInfo.InvariantCulture); break; case "title": parseType = reader.GetAttribute("parseType", RDF); if (parseType != "Literal") throw new ApplicationException("Unrecognized parse type."); book.Title = reader.ReadElementContentAsString(); break; case "description": book.Notes = ParseText(reader); break; case "creator": book.Authors = ParsePersonsAndEra(book, reader); break; case "contributor": // Eras of contributors do not affect the era of the book intentionally. // Contributors might be not only illustrators, who would live at the // same time as authors, but also people who retyped the book and // uploaded it to the Project Gutenberg web site. They would mess the // era of the book which is supposed to embrace the time when the actual // paper book was written. book.Contributors = ParsePersons(reader); break; case "language": if (!reader.ReadToDescendant("value", RDF)) throw new ApplicationException("Missing language value."); book.Language = reader.ReadElementContentAsString(); break; case "subject": using (var subreader = reader.ReadSubtree()) book.Tags = ParseTags(subreader).ToArray(); break; case "created": if (!reader.ReadToDescendant("value", RDF)) throw new ApplicationException("Missing creation date value."); book.Included = reader.ReadElementContentAsDate(); break; case "downloads": if (!reader.ReadToDescendant("value", RDF)) throw new ApplicationException("Missing download count value."); book.Downloads = reader.ReadElementContentAsInt(); break; } } return book; }
// Parses information about single or multiple persons (authors or contributors) which // contain era (year span) of their life. Names of the persons are returned and the // era in the book is updated to span across all life spans of all persons. string[] ParsePersonsAndEra(Book book, XmlReader reader) { // Checking the attribute parseType is a quick and dirty way to detect a multivalue // property without inspecting the entire element structure and luckily works here. var parseType = reader.GetAttribute("parseType", RDF); if (parseType == "Literal") { var person = reader.ReadElementContentAsString(); ParsePersonEra(book, person); return new[] { person }; } using (var subreader = reader.ReadSubtree()) return ParseMultiplePersonsAndEra(book, subreader); }
// Extracts the era of a person's life from the line with the person's name and updates // the book so that its era includes the person's one. void ParsePersonEra(Book book, string value) { YearSpan era; var comma = value.LastIndexOf(','); if (comma > 0 && YearSpan.TryParse(value.Substring(comma + 1).Trim(), CultureInfo.InvariantCulture, out era)) book.Era = book.Era.Union(era); }
// Parses the subtree of multiple persons. It is to be called from the ParsePersonsAndEra // method only. string[] ParseMultiplePersonsAndEra(Book book, XmlReader reader) { var persons = new List<string>(); while (reader.ReadToFollowing("li", RDF)) { // So far, I haven't noticed other multivalues than those consisting of literals. // When they occur I'll implement better vaue parsing here. Now I save time. var parseType = reader.GetAttribute("parseType", RDF); if (parseType != "Literal") throw new ApplicationException("Unrecognized parse type."); string person = reader.ReadElementContentAsString(); ParsePersonEra(book, person); persons.Add(person); } return persons.Any() ? persons.ToArray() : null; }
// Gets a book by its name or number if it exists, not throwing and exception if it does // not. (If the string contains only digits it will be interpreted as a number.) public bool TryGetBook(string name, out Book book) { int number; if (int.TryParse(name, NumberStyles.Integer, CultureInfo.InvariantCulture, out number)) { if (number >= 0 && number < BooksByNumber.Length) { book = BooksByNumber[number]; return book != null; } book = null; return false; } return BooksByName.TryGetValue(name, out book); }