private void GetMeta(Dictionary <string, double> dictionary) { string description = GetMetaDescription(); IStemmer p = new Porter2(); if (description != null) { var arr = Regex.Matches(description, @"\b\w{2,}\b", RegexOptions.Compiled); foreach (Match m in arr) { var w = m.Value; if (!stopwords.Contains(w) && w != string.Empty) { string stemmed = p.stem(w); if (dictionary.ContainsKey(stemmed)) { dictionary[stemmed] += Ranker["keywords"]; } else { dictionary.Add(stemmed, Ranker["keywords"]); } } } } string keywords = GetMetaKeywords(); if (keywords != null) { var arr = Regex.Matches(keywords, @"\b\w{2,}\b", RegexOptions.Compiled); foreach (Match m in arr) { var w = m.Value; string stemmed = p.stem(w); if (w != string.Empty) { if (dictionary.ContainsKey(stemmed)) { dictionary[stemmed] += Ranker["description"]; } else { dictionary.Add(stemmed, Ranker["description"]); } } } } }
public Dictionary <string, double> KeywordsVectorsFromText() { IStemmer p = new Porter2(); double divider = 0; Dictionary <string, double> dictionary = new Dictionary <string, double>(); GetMeta(dictionary); string word; foreach (var NodeType in Ranker) { var Nodes = doc.DocumentNode.Descendants(NodeType.Key); if (Nodes.Count() == 0) { continue; } foreach (var Node in Nodes) { var KeywordsMatches = Regex.Matches(HttpUtility.HtmlDecode( string.Join(" ", Node.Descendants() .Where(n => !n.HasChildNodes && !string.IsNullOrWhiteSpace(n.InnerText)) .Select(n => n.InnerText)) ), @"\b\w{2,}\b", RegexOptions.Compiled); foreach (Match KeywordMatch in KeywordsMatches) { word = KeywordMatch.Value.ToLowerInvariant(); if (!stopwords.Contains(word)) { word = p.stem(word); if (dictionary.ContainsKey(word)) { dictionary[word] += NodeType.Value; } else { dictionary.Add(word, NodeType.Value); } } } } Nodes.ToList().ForEach(N => N.Remove()); } foreach (string LinkWord in GetLinkWords()) { word = p.stem(LinkWord); if (dictionary.ContainsKey(word)) { dictionary[word] += 25; } else { dictionary[word] = 25; } } if (dictionary.Keys.Count > 0) { divider = dictionary.Values.Max(); } foreach (var Key in dictionary.Keys.ToList()) { dictionary[Key] = Math.Round(dictionary[Key] / divider, 4); } word = p.stem(GetDomainWord()); if (dictionary.ContainsKey(word)) { dictionary[word] += 0.25; } else { dictionary[word] = 1; } return(dictionary); }