internal static Section[] GetSections(IElement elem, Section[] sections = null) { if (sections == null) { return(GetSections(elem, new Section[0])); } if (FiFiPageEntry.IsEnd(elem)) { return(sections); } var sectionHeadTags = new string[2] { "H4", "H5" }; if (sectionHeadTags.Contains(elem.TagName)) { // some sections have no text content var sectionElems = FiFiPage.GetElementsUntil(sectionHeadTags, elem.NextElementSibling); bool hasContent = sectionElems.Count() > 0 && sectionElems.Select(e => e.TextContent.Trim()).Aggregate((x, y) => x + y).Length > 0; if (hasContent) { return(GetSections(elem.NextElementSibling, AddSection(sections, new Section(elem)))); } } return(GetSections(elem.NextElementSibling, sections)); }
public static TermObject GetTerm(string term) { FiFiPage fifiPage = FiFiPage.GetPage(term); var fifiObject = (!fifiPage.IsEmpty() && fifiPage.IsFinnish()) ? (TermObject)fifiPage.GetJsonObject() : new TermObject() { term = term, entries = Enumerable.Empty <EntryObject>() }; EnFiPage enfiPage = EnFiPage.GetPage(term); var enfiObject = (!enfiPage.IsEmpty() && enfiPage.IsFinnish()) ? (TermObject)enfiPage.GetJsonObject() : new TermObject() { term = term, entries = Enumerable.Empty <EntryObject>() }; var combinedTerms = new TermObject() { term = fifiObject.term, entries = fifiObject.entries.Concat(enfiObject.entries) }; return(combinedTerms); }
internal static FiFiPageEntry[] GetEntries(IElement[] elements, FiFiPageEntry[] entries) { // cycle through Finnish elements on HtmlPage IElement elem = elements.FirstOrDefault(); if (FiFiPage.IsEndOfPage(elem)) { return(entries); } // page entries are headed by h3 tags // if element is not an h3 then if (elem.TagName != "H3") { return(GetEntries(elements.Skip(1).ToArray(), entries)); } // otherwise, create a new PageEntry, add it to entries, and continue. FiFiPageEntry entry = new FiFiPageEntry(elements); return(GetEntries(elements.Skip(1).ToArray(), FiFiPageEntry.AddEntry(entries, entry))); }
internal static Item[] GetItems(IElement elem, Item[] items = null) { if (items == null) { return(GetItems(elem, new Item[0])); } if (FiFiPage.IsEndOfPage(elem)) { return(items); } if (String.IsNullOrEmpty(elem.TextContent.Trim())) { return(GetItems(elem.NextElementSibling, items)); } switch (elem.TagName) { case "H3": case "H4": case "H5": return(items); case "UL": case "OL": case "DIV": case "DD": var lItems = elem.QuerySelectorAll("li,dl").Select(li => new Item(li)).ToArray(); return(GetItems(elem.NextElementSibling, items.Concat(lItems).ToArray())); default: return(GetItems(elem.NextElementSibling, items.Concat(new Item[1] { new Item(elem) }).ToArray())); } }
internal static bool IsEnd(IElement elem) { // this should be moved to WiktionaryPage return(FiFiPage.IsEndOfPage(elem) || elem.TagName == "H3"); }
// end of section / entry internal static bool IsEnd(IElement elem) { return(FiFiPage.IsEndOfPage(elem) || elem.TagName == "H3"); }