示例#1
0
        private void ReadDictionary(HtmlNode dictionaryNode, Word word, string url)
        {
            var dictionaryName = dictionaryNode.SelectSingleNode("./h2/span|./h3/span").InnerText?.TrimAllSpecialCharacters();

            SubDictionary subDictionary = null; // context.Dictionaries.Where(d => d.Name == dictionaryName).SingleOrDefault();

            if (subDictionary == null)
            {
                subDictionary = new SubDictionary()
                {
                    Name      = dictionaryName,
                    IsPrimary = dictionaryName == "Thông dụng" ? true : (bool?)null
                };
            }

            var wordClassNodes = dictionaryNode.SelectNodes("./div[@class='section-h3']");

            if (wordClassNodes != null)
            {
                var wordFormsNode = wordClassNodes.SingleOrDefault(c => c.SelectSingleNode("./h3[1]/span")?.InnerHtml.IndexOf("Hình thái từ", StringComparison.OrdinalIgnoreCase) >= 0);
                if (wordFormsNode != null)
                {
                    wordClassNodes.Remove(wordFormsNode);
                    word.WordForms.AddRange(ReadWordForms(wordFormsNode));
                }

                var phasesNode = wordClassNodes.SingleOrDefault(c => c.SelectSingleNode("./h3[1]/span")?.InnerHtml.IndexOf("Cấu trúc từ", StringComparison.OrdinalIgnoreCase) >= 0);
                if (phasesNode != null)
                {
                    wordClassNodes.Remove(phasesNode);
                    word.Phases = ReadPhases(phasesNode, subDictionary);
                }

                var defs = wordClassNodes.SelectMany(wc => ReadWordClass(wc, word, subDictionary));
                word.Definitions.AddRange(defs);
            }
            else
            {
                // This is for pages like http://tratu.soha.vn/dict/en_vn/Ablative_method, a lot of pages have this form
                var defNodes = dictionaryNode.SelectNodes("./div[@class='section-h5']");
                if (defNodes != null)
                {
                    var defs = defNodes.Select(d => new Definition()
                    {
                        Content       = d.InnerText?.TrimAllSpecialCharacters(),
                        Word          = word,
                        SubDictionary = subDictionary,
                        WordClass     = UnknownWordClass,
                    });
                    word.Definitions.AddRange(defs);
                }
                else
                {
                    logger.Log(GetType(), Level.Warn, $"Could not get any word class node at {url}", null);
                }
            }
        }
示例#2
0
        /// <summary>
        /// Gets the value associated with the specified key.
        /// </summary>
        /// <param name="key">The key whose value to get.</param>
        /// <param name="value">When this method returns, the value associated with the specified key, if the key is found; otherwise, the default value for the type of the <paramref name="value" /> parameter. This parameter is passed uninitialized.</param>
        /// <returns>
        /// true if the object that : <see cref="T:System.Collections.Generic.IDictionary`2" /> contains an element with the specified key; otherwise, false.
        /// </returns>
        /// <exception cref="System.NotImplementedException"></exception>
        public bool TryGetValue(TK1 key, out TV1 value)
        {
            TV2 evalue;

            if (SubDictionary.TryGetValue(IntCast(key), out evalue))
            {
                value = ExtCast(evalue);
                return(true);
            }

            value = default(TV1);
            return(false);
        }
示例#3
0
        private Definition ReadDefinition(HtmlNode definitionNode, Word word, SubDictionary subDictionary, WordClass wordClass)
        {
            var definitionTextNode = definitionNode.SelectSingleNode("./h5[1]/span");

            // This is in case wordClassNode treated as a definition node (http://tratu.soha.vn/dict/en_vn/Allegedly)
            if (definitionTextNode == null)
            {
                definitionTextNode = definitionNode.SelectSingleNode("./h3[1]/span");
            }
            var usageNodes = definitionNode.SelectNodes("./dl/dd/dl/dd")?.Select(u => u.InnerText?.TrimAllSpecialCharacters()).ToList();

            var definition = new Definition()
            {
                Content       = definitionTextNode?.InnerText?.TrimAllSpecialCharacters(),
                Word          = word,
                SubDictionary = subDictionary,
                WordClass     = wordClass,
                Usages        = new List <Usage>()
            };

            if (usageNodes != null)
            {
                if (usageNodes.Count % 2 != 0)
                {
                    // This is for pages like http://tratu.soha.vn/dict/en_vn/According, http://tratu.soha.vn/dict/en_vn/Acclimatization
                    usageNodes.RemoveAt(0);
                    logger.Log(GetType(), Level.Warn, "Number of usages is an old number, remove the first one", null);
                }

                for (int i = 0; i < usageNodes.Count / 2; i++)
                {
                    definition.Usages.Add(new Usage()
                    {
                        Sample      = usageNodes[i * 2],
                        Translation = usageNodes[i * 2 + 1]
                    });
                }
            }

            return(definition);
        }
示例#4
0
 /// <summary>
 /// Determines whether the <see cref="T:System.Collections.Generic.IDictionary`2" /> contains an element with the specified key.
 /// </summary>
 /// <param name="key">The key to locate in the <see cref="T:System.Collections.Generic.IDictionary`2" />.</param>
 /// <returns>
 /// true if the <see cref="T:System.Collections.Generic.IDictionary`2" /> contains an element with the key; otherwise, false.
 /// </returns>
 /// <exception cref="System.NotImplementedException"></exception>
 public bool ContainsKey(TK1 key)
 {
     return(SubDictionary.ContainsKey(IntCast(key)));
 }
示例#5
0
 /// <summary>
 /// Removes the element with the specified key from the <see cref="T:System.Collections.Generic.IDictionary`2" />.
 /// </summary>
 /// <param name="key">The key of the element to remove.</param>
 /// <returns>
 /// true if the element is successfully removed; otherwise, false.  This method also returns false if <paramref name="key" /> was not found in the original <see cref="T:System.Collections.Generic.IDictionary`2" />.
 /// </returns>
 /// <exception cref="System.NotImplementedException"></exception>
 public bool Remove(TK1 key)
 {
     return(SubDictionary.Remove(IntCast(key)));
 }
示例#6
0
 /// <summary>
 /// Removes the first occurrence of a specific object from the <see cref="T:System.Collections.Generic.ICollection`1" />.
 /// </summary>
 /// <param name="item">The object to remove from the <see cref="T:System.Collections.Generic.ICollection`1" />.</param>
 /// <returns>
 /// true if <paramref name="item" /> was successfully removed from the <see cref="T:System.Collections.Generic.ICollection`1" />; otherwise, false. This method also returns false if <paramref name="item" /> is not found in the original <see cref="T:System.Collections.Generic.ICollection`1" />.
 /// </returns>
 /// <exception cref="System.NotImplementedException"></exception>
 public bool Remove(KeyValuePair <TK1, TV1> item)
 {
     return(SubDictionary.Remove(IntCast(item)));
 }
示例#7
0
 /// <summary>
 /// Determines whether the <see cref="T:System.Collections.Generic.ICollection`1" /> contains a specific value.
 /// </summary>
 /// <param name="item">The object to locate in the <see cref="T:System.Collections.Generic.ICollection`1" />.</param>
 /// <returns>
 /// true if <paramref name="item" /> is found in the <see cref="T:System.Collections.Generic.ICollection`1" />; otherwise, false.
 /// </returns>
 /// <exception cref="System.NotImplementedException"></exception>
 public bool Contains(KeyValuePair <TK1, TV1> item)
 {
     return(SubDictionary.Contains(IntCast(item)));
 }
示例#8
0
 /// <summary>
 /// Adds an element with the provided key and value to the <see cref="T:System.Collections.Generic.IDictionary`2" />.
 /// </summary>
 /// <param name="key">The object to use as the key of the element to add.</param>
 /// <param name="value">The object to use as the value of the element to add.</param>
 /// <exception cref="System.NotImplementedException"></exception>
 public void Add(TK1 key, TV1 value)
 {
     SubDictionary.Add(IntCast(key), IntCast(value));
 }
示例#9
0
 /// <summary>
 /// Removes all items from the <see cref="T:System.Collections.Generic.ICollection`1" />.
 /// </summary>
 /// <exception cref="System.NotImplementedException"></exception>
 public void Clear()
 {
     SubDictionary.Clear();
 }
示例#10
0
 /// <summary>
 /// Adds an item to the <see cref="T:System.Collections.Generic.ICollection`1" />.
 /// </summary>
 /// <param name="item">The object to add to the <see cref="T:System.Collections.Generic.ICollection`1" />.</param>
 /// <exception cref="System.NotImplementedException"></exception>
 public void Add(KeyValuePair <TK1, TV1> item)
 {
     SubDictionary.Add(IntCast(item));
 }
示例#11
0
 /// <summary>
 /// Returns an enumerator that iterates through the collection.
 /// </summary>
 /// <returns>
 /// A <see cref="T:System.Collections.Generic.IEnumerator`1" /> that can be used to iterate through the collection.
 /// </returns>
 /// <exception cref="System.NotImplementedException"></exception>
 public IEnumerator <KeyValuePair <TK1, TV1> > GetEnumerator()
 {
     return(SubDictionary.Select(ExtCast).GetEnumerator());
 }
示例#12
0
 get => ExtCast(SubDictionary[IntCast(key)]);
示例#13
0
        private List <Phase> ReadPhases(HtmlNode phasesNode, SubDictionary subDictionary)
        {
            return(phasesNode.SelectNodes("./div").Select(p =>
            {
                var phase = new Phase()
                {
                    SubDictionary = subDictionary
                };

                var phaseContentNode = p.SelectSingleNode("./h5");
                phase.Content = phaseContentNode.InnerText?.TrimAllSpecialCharacters();

                var defNodes = p.SelectNodes("./dl/dd/dl/dd");
                if (defNodes != null)
                {
                    string prevDefContent = null;
                    var defs = new List <PhaseDefinition>();
                    var usageStrs = new List <string>();

                    defNodes.Append(HtmlNode.CreateNode("dummy"));
                    foreach (HtmlNode d in defNodes)
                    {
                        string defContent = d.InnerText?.TrimAllSpecialCharacters();
                        var usageNodes = d.SelectNodes("./dl/dd");

                        if (!string.IsNullOrEmpty(defContent) || d.InnerHtml == "dummy")
                        {
                            if (!string.IsNullOrEmpty(prevDefContent))
                            {
                                var usages = new List <PhaseUsage>();
                                for (int i = 0; i < usageStrs.Count / 2; i++)
                                {
                                    usages.Add(new PhaseUsage()
                                    {
                                        Sample = usageStrs[2 * i],
                                        Translation = usageStrs[2 * i + 1]
                                    });
                                }
                                var def = new PhaseDefinition()
                                {
                                    Content = prevDefContent,
                                    Usages = usages
                                };

                                defs.Add(def);
                            }

                            usageStrs = usageNodes?.Select(u => u.InnerText?.TrimAllSpecialCharacters()).ToList() ?? new List <string>();
                            prevDefContent = defContent;
                        }
                        else
                        {
                            if (usageNodes != null)
                            {
                                usageStrs.AddRange(usageNodes.Select(u => u.InnerText?.TrimAllSpecialCharacters()));
                            }
                        }
                    }

                    phase.Definitions = defs;
                }
                else
                {
                    var defContentNode = phaseContentNode.NextSibling.NextSibling;
                    var defContent = defContentNode.InnerText?.TrimAllSpecialCharacters();
                    phase.Definitions = new List <PhaseDefinition>()
                    {
                        new PhaseDefinition()
                        {
                            Content = defContent
                        }
                    };
                }

                return phase;
            }).ToList());
        }
示例#14
0
        private IEnumerable <Definition> ReadWordClass(HtmlNode wordClassNode, Word word, SubDictionary subDictionary)
        {
            string wordClassText = wordClassNode.SelectSingleNode("./h3[1]/span/text()").InnerText?.TrimAllSpecialCharacters();

            if ((wordClassText?.Length ?? 0) > 100)
            {
                wordClassText = wordClassText.Substring(0, 100);
            }
            WordClass wordClass = null; // context.WordClasses.SingleOrDefault(wc => wc.Name == wordClassText);

            if (wordClass == null)
            {
                wordClass = new WordClass()
                {
                    Name = wordClassText
                };
            }

            var definitionNodes = wordClassNode.SelectNodes("./div");

            if (definitionNodes == null)
            {
                // Treat wordClassNode as a definiton node (http://tratu.soha.vn/dict/en_vn/Allegedly)
                return(new List <Definition>()
                {
                    ReadDefinition(wordClassNode, word, subDictionary, wordClass)
                });
            }

            // Add a where here because in some cases a definition content is null and this will cause the world not to be registed
            // http://tratu.soha.vn/dict/en_vn/Absorbency
            return(definitionNodes.Select(n => ReadDefinition(n, word, subDictionary, wordClass)).Where(d => !string.IsNullOrEmpty(d.Content)));
        }