private void ReadDictionary(HtmlNode dictionaryNode, Word word, string url) { var dictionaryName = dictionaryNode.SelectSingleNode("./h2/span|./h3/span").InnerText?.TrimAllSpecialCharacters(); SubDictionary subDictionary = null; // context.Dictionaries.Where(d => d.Name == dictionaryName).SingleOrDefault(); if (subDictionary == null) { subDictionary = new SubDictionary() { Name = dictionaryName, IsPrimary = dictionaryName == "Thông dụng" ? true : (bool?)null }; } var wordClassNodes = dictionaryNode.SelectNodes("./div[@class='section-h3']"); if (wordClassNodes != null) { var wordFormsNode = wordClassNodes.SingleOrDefault(c => c.SelectSingleNode("./h3[1]/span")?.InnerHtml.IndexOf("Hình thái từ", StringComparison.OrdinalIgnoreCase) >= 0); if (wordFormsNode != null) { wordClassNodes.Remove(wordFormsNode); word.WordForms.AddRange(ReadWordForms(wordFormsNode)); } var phasesNode = wordClassNodes.SingleOrDefault(c => c.SelectSingleNode("./h3[1]/span")?.InnerHtml.IndexOf("Cấu trúc từ", StringComparison.OrdinalIgnoreCase) >= 0); if (phasesNode != null) { wordClassNodes.Remove(phasesNode); word.Phases = ReadPhases(phasesNode, subDictionary); } var defs = wordClassNodes.SelectMany(wc => ReadWordClass(wc, word, subDictionary)); word.Definitions.AddRange(defs); } else { // This is for pages like http://tratu.soha.vn/dict/en_vn/Ablative_method, a lot of pages have this form var defNodes = dictionaryNode.SelectNodes("./div[@class='section-h5']"); if (defNodes != null) { var defs = defNodes.Select(d => new Definition() { Content = d.InnerText?.TrimAllSpecialCharacters(), Word = word, SubDictionary = subDictionary, WordClass = UnknownWordClass, }); word.Definitions.AddRange(defs); } else { logger.Log(GetType(), Level.Warn, $"Could not get any word class node at {url}", null); } } }
/// <summary> /// Gets the value associated with the specified key. /// </summary> /// <param name="key">The key whose value to get.</param> /// <param name="value">When this method returns, the value associated with the specified key, if the key is found; otherwise, the default value for the type of the <paramref name="value" /> parameter. This parameter is passed uninitialized.</param> /// <returns> /// true if the object that : <see cref="T:System.Collections.Generic.IDictionary`2" /> contains an element with the specified key; otherwise, false. /// </returns> /// <exception cref="System.NotImplementedException"></exception> public bool TryGetValue(TK1 key, out TV1 value) { TV2 evalue; if (SubDictionary.TryGetValue(IntCast(key), out evalue)) { value = ExtCast(evalue); return(true); } value = default(TV1); return(false); }
private Definition ReadDefinition(HtmlNode definitionNode, Word word, SubDictionary subDictionary, WordClass wordClass) { var definitionTextNode = definitionNode.SelectSingleNode("./h5[1]/span"); // This is in case wordClassNode treated as a definition node (http://tratu.soha.vn/dict/en_vn/Allegedly) if (definitionTextNode == null) { definitionTextNode = definitionNode.SelectSingleNode("./h3[1]/span"); } var usageNodes = definitionNode.SelectNodes("./dl/dd/dl/dd")?.Select(u => u.InnerText?.TrimAllSpecialCharacters()).ToList(); var definition = new Definition() { Content = definitionTextNode?.InnerText?.TrimAllSpecialCharacters(), Word = word, SubDictionary = subDictionary, WordClass = wordClass, Usages = new List <Usage>() }; if (usageNodes != null) { if (usageNodes.Count % 2 != 0) { // This is for pages like http://tratu.soha.vn/dict/en_vn/According, http://tratu.soha.vn/dict/en_vn/Acclimatization usageNodes.RemoveAt(0); logger.Log(GetType(), Level.Warn, "Number of usages is an old number, remove the first one", null); } for (int i = 0; i < usageNodes.Count / 2; i++) { definition.Usages.Add(new Usage() { Sample = usageNodes[i * 2], Translation = usageNodes[i * 2 + 1] }); } } return(definition); }
/// <summary> /// Determines whether the <see cref="T:System.Collections.Generic.IDictionary`2" /> contains an element with the specified key. /// </summary> /// <param name="key">The key to locate in the <see cref="T:System.Collections.Generic.IDictionary`2" />.</param> /// <returns> /// true if the <see cref="T:System.Collections.Generic.IDictionary`2" /> contains an element with the key; otherwise, false. /// </returns> /// <exception cref="System.NotImplementedException"></exception> public bool ContainsKey(TK1 key) { return(SubDictionary.ContainsKey(IntCast(key))); }
/// <summary> /// Removes the element with the specified key from the <see cref="T:System.Collections.Generic.IDictionary`2" />. /// </summary> /// <param name="key">The key of the element to remove.</param> /// <returns> /// true if the element is successfully removed; otherwise, false. This method also returns false if <paramref name="key" /> was not found in the original <see cref="T:System.Collections.Generic.IDictionary`2" />. /// </returns> /// <exception cref="System.NotImplementedException"></exception> public bool Remove(TK1 key) { return(SubDictionary.Remove(IntCast(key))); }
/// <summary> /// Removes the first occurrence of a specific object from the <see cref="T:System.Collections.Generic.ICollection`1" />. /// </summary> /// <param name="item">The object to remove from the <see cref="T:System.Collections.Generic.ICollection`1" />.</param> /// <returns> /// true if <paramref name="item" /> was successfully removed from the <see cref="T:System.Collections.Generic.ICollection`1" />; otherwise, false. This method also returns false if <paramref name="item" /> is not found in the original <see cref="T:System.Collections.Generic.ICollection`1" />. /// </returns> /// <exception cref="System.NotImplementedException"></exception> public bool Remove(KeyValuePair <TK1, TV1> item) { return(SubDictionary.Remove(IntCast(item))); }
/// <summary> /// Determines whether the <see cref="T:System.Collections.Generic.ICollection`1" /> contains a specific value. /// </summary> /// <param name="item">The object to locate in the <see cref="T:System.Collections.Generic.ICollection`1" />.</param> /// <returns> /// true if <paramref name="item" /> is found in the <see cref="T:System.Collections.Generic.ICollection`1" />; otherwise, false. /// </returns> /// <exception cref="System.NotImplementedException"></exception> public bool Contains(KeyValuePair <TK1, TV1> item) { return(SubDictionary.Contains(IntCast(item))); }
/// <summary> /// Adds an element with the provided key and value to the <see cref="T:System.Collections.Generic.IDictionary`2" />. /// </summary> /// <param name="key">The object to use as the key of the element to add.</param> /// <param name="value">The object to use as the value of the element to add.</param> /// <exception cref="System.NotImplementedException"></exception> public void Add(TK1 key, TV1 value) { SubDictionary.Add(IntCast(key), IntCast(value)); }
/// <summary> /// Removes all items from the <see cref="T:System.Collections.Generic.ICollection`1" />. /// </summary> /// <exception cref="System.NotImplementedException"></exception> public void Clear() { SubDictionary.Clear(); }
/// <summary> /// Adds an item to the <see cref="T:System.Collections.Generic.ICollection`1" />. /// </summary> /// <param name="item">The object to add to the <see cref="T:System.Collections.Generic.ICollection`1" />.</param> /// <exception cref="System.NotImplementedException"></exception> public void Add(KeyValuePair <TK1, TV1> item) { SubDictionary.Add(IntCast(item)); }
/// <summary> /// Returns an enumerator that iterates through the collection. /// </summary> /// <returns> /// A <see cref="T:System.Collections.Generic.IEnumerator`1" /> that can be used to iterate through the collection. /// </returns> /// <exception cref="System.NotImplementedException"></exception> public IEnumerator <KeyValuePair <TK1, TV1> > GetEnumerator() { return(SubDictionary.Select(ExtCast).GetEnumerator()); }
get => ExtCast(SubDictionary[IntCast(key)]);
private List <Phase> ReadPhases(HtmlNode phasesNode, SubDictionary subDictionary) { return(phasesNode.SelectNodes("./div").Select(p => { var phase = new Phase() { SubDictionary = subDictionary }; var phaseContentNode = p.SelectSingleNode("./h5"); phase.Content = phaseContentNode.InnerText?.TrimAllSpecialCharacters(); var defNodes = p.SelectNodes("./dl/dd/dl/dd"); if (defNodes != null) { string prevDefContent = null; var defs = new List <PhaseDefinition>(); var usageStrs = new List <string>(); defNodes.Append(HtmlNode.CreateNode("dummy")); foreach (HtmlNode d in defNodes) { string defContent = d.InnerText?.TrimAllSpecialCharacters(); var usageNodes = d.SelectNodes("./dl/dd"); if (!string.IsNullOrEmpty(defContent) || d.InnerHtml == "dummy") { if (!string.IsNullOrEmpty(prevDefContent)) { var usages = new List <PhaseUsage>(); for (int i = 0; i < usageStrs.Count / 2; i++) { usages.Add(new PhaseUsage() { Sample = usageStrs[2 * i], Translation = usageStrs[2 * i + 1] }); } var def = new PhaseDefinition() { Content = prevDefContent, Usages = usages }; defs.Add(def); } usageStrs = usageNodes?.Select(u => u.InnerText?.TrimAllSpecialCharacters()).ToList() ?? new List <string>(); prevDefContent = defContent; } else { if (usageNodes != null) { usageStrs.AddRange(usageNodes.Select(u => u.InnerText?.TrimAllSpecialCharacters())); } } } phase.Definitions = defs; } else { var defContentNode = phaseContentNode.NextSibling.NextSibling; var defContent = defContentNode.InnerText?.TrimAllSpecialCharacters(); phase.Definitions = new List <PhaseDefinition>() { new PhaseDefinition() { Content = defContent } }; } return phase; }).ToList()); }
private IEnumerable <Definition> ReadWordClass(HtmlNode wordClassNode, Word word, SubDictionary subDictionary) { string wordClassText = wordClassNode.SelectSingleNode("./h3[1]/span/text()").InnerText?.TrimAllSpecialCharacters(); if ((wordClassText?.Length ?? 0) > 100) { wordClassText = wordClassText.Substring(0, 100); } WordClass wordClass = null; // context.WordClasses.SingleOrDefault(wc => wc.Name == wordClassText); if (wordClass == null) { wordClass = new WordClass() { Name = wordClassText }; } var definitionNodes = wordClassNode.SelectNodes("./div"); if (definitionNodes == null) { // Treat wordClassNode as a definiton node (http://tratu.soha.vn/dict/en_vn/Allegedly) return(new List <Definition>() { ReadDefinition(wordClassNode, word, subDictionary, wordClass) }); } // Add a where here because in some cases a definition content is null and this will cause the world not to be registed // http://tratu.soha.vn/dict/en_vn/Absorbency return(definitionNodes.Select(n => ReadDefinition(n, word, subDictionary, wordClass)).Where(d => !string.IsNullOrEmpty(d.Content))); }