private OrdEntry ProcessSuperLemmaNode(HtmlNode superLemmaNode) { string localParseClass(string className) { var nodes = superLemmaNode.SelectNodes($".//*[@class='{ className }']"); return(nodes != null?string.Join(", ", nodes.ToList().Select(x => x.InnerText)) : string.Empty); }; var grundForm = localParseClass("orto"); if (grundForm == string.Empty) { grundForm = localParseClass("grundform_ptv"); } var ordklass = localParseClass("ordklass"); var böjningar = localParseClass("bojning"); var lexemDivs = superLemmaNode .SelectNodes("./*[@class='lexemdiv']") .ToList(); var definitions = new List <OrdDefinition>(); foreach (var lexemDiv in lexemDivs) { var lexems = lexemDiv .SelectNodes("./*[@class='lexem']") .ToList(); foreach (var lexem in lexems) { var ordDefinitionBuilder = new OrdDefinitionBuilder(); ordDefinitionBuilder.Definition = lexem.SelectSingleNode($".//*[@class='def']")?.InnerText; ordDefinitionBuilder.DefinitionT = lexem.SelectSingleNode($".//*[@class='deft']")?.InnerText; var syntexNodes = lexem.SelectNodes(".//*[@class='syntex']"); if (syntexNodes != null) { syntexNodes .ToList() .ForEach(x => ordDefinitionBuilder.Exempel.Add(x.InnerText)); } var valens = lexem.SelectSingleNode($".//*[@class='valens']"); if (valens != null) { var vtNodes = valens.SelectNodes($".//*[@class='vt']"); if (vtNodes != null) { vtNodes .ToList() .ForEach(x => ordDefinitionBuilder.Konstruktion.Add(x.InnerText)); } } var definition = ordDefinitionBuilder.AsNew(); if (definition.IsValid()) { definitions.Add(definition); } } } return(new OrdEntry(grundForm, ordklass, definitions, böjningar, SearchUrl)); }
public FolketsOrdbok() { var path = AppDomain.CurrentDomain.BaseDirectory + FolketsOrdbokFileName; Console.WriteLine("Downloading lexicon from Folkets Ordbok..."); if (File.Exists(path)) { Console.WriteLine("Lexicon from Folkets Ordbok already exists. Skipping download..."); } else { using (var client = new WebClient()) { // TODO: Check if URL is valid... client.DownloadFile(Resources.ConstantData.Sources.FolketsOrdbokLexicon, FolketsOrdbokFileName); } Console.WriteLine("Finished downloading lexicon from Folkets Ordbok."); } FolketsOrdbokSource folketsOrdbokSource = null; if (File.Exists(path)) { using (var file = File.OpenText(path)) { var serializer = new XmlSerializer(typeof(FolketsOrdbokSource)); folketsOrdbokSource = (FolketsOrdbokSource)(serializer.Deserialize(file)); } } else { Console.WriteLine("Failed to load Folkets Ordbok."); } if (folketsOrdbokSource == null) { return; } foreach (var word in folketsOrdbokSource.Words) { var definitionBuilder = new OrdDefinitionBuilder(); if (word.Definition != null) { definitionBuilder.Definition = word.Definition.Value; } if (word.Example != null) { definitionBuilder.Exempel.Add(word.Example.Value); } var ordBuilder = new OrdEntryBuilder(); ordBuilder.Definitioner.Add(definitionBuilder.AsNew()); ordBuilder.Grundform = word.Value; ordBuilder.Ordklass = word.Class; Words.Add(ordBuilder.AsNew()); } }