public List<TextUnitScore> GetImportantTextUnits(List<Sentence> Sentences) { var textUnitFrequencyGrader = new Dictionary<TextUnit, long>(); foreach (var tu in Sentences.SelectMany(s => s.TextUnits)) { if (m_Rules.UnimportantWords.Contains(tu.FormattedValue)) { continue; } if (textUnitFrequencyGrader.ContainsKey(tu)) { textUnitFrequencyGrader[tu]++; } else { textUnitFrequencyGrader.Add(tu, 1); } } return textUnitFrequencyGrader.OrderByDescending(kvp => kvp.Value) .Select(kvp => new TextUnitScore {ScoredTextUnit = kvp.Key, Score = kvp.Value}) .ToList(); }
internal string ReplaceWord(string word, Dictionary<string, string> replacementRules) { foreach (var rule in replacementRules) { if (word == rule.Key) { return rule.Value; } } return word; }
internal string StripPrefix(string word, Dictionary<string, string> prefixRules) { //not simply using .Replace() in this method in case the //rule.Key exists multiple times in the string. foreach (var rule in prefixRules) { if (word.StartsWith(rule.Key)) { word = rule.Value + word.Substring(rule.Key.Length); } } return word; }
public string StripSuffix(string word, Dictionary<string, string> suffixRules) { //not simply using .Replace() in this method in case the //rule.Key exists multiple times in the string. foreach (var rule in suffixRules) { if (word.EndsWith(rule.Key)) { word = word.Substring(0, word.Length - rule.Key.Length) + rule.Value; } } return word; }
public TextUnitBuilder(Dictionary Rules) { m_Rules = Rules; }
public ClassicContentParser(Dictionary rules, ITextUnitBuilder textUnitBuilder) { m_Rules = rules; m_TextUnitBuilder = textUnitBuilder; }
public ClassicContentAnalyzer(Dictionary Rules) { m_Rules = Rules; }
public static Dictionary LoadFromFile(string dictionaryLanguage) { var dictionaryFile = string.Format(@"{1}\dics\{0}.xml", dictionaryLanguage, Path.GetDirectoryName(Assembly.GetExecutingAssembly().GetName().CodeBase)?.Substring(6)); if (!File.Exists(dictionaryFile)) { throw new FileNotFoundException("Could Not Load Dictionary: " + dictionaryFile); } var dict = new Dictionary(); var doc = XElement.Load(dictionaryFile); dict.Step1PrefixRules = LoadKeyValueRule(doc, "stemmer", "step1_pre"); dict.Step1SuffixRules = LoadKeyValueRule(doc, "stemmer", "step1_post"); dict.ManualReplacementRules = LoadKeyValueRule(doc, "stemmer", "manual"); dict.PrefixRules = LoadKeyValueRule(doc, "stemmer", "pre"); dict.SuffixRules = LoadKeyValueRule(doc, "stemmer", "post"); dict.SynonymRules = LoadKeyValueRule(doc, "stemmer", "synonyms"); dict.LinebreakRules = LoadValueOnlyRule(doc, "parser", "linebreak"); dict.NotALinebreakRules = LoadValueOnlyRule(doc, "parser", "linedontbreak"); dict.DepreciateValueRule = LoadValueOnlyRule(doc, "grader-syn", "depreciate"); dict.TermFreqMultiplierRule = LoadValueOnlySection(doc, "grader-tf"); dict.UnimportantWords = new List<string>(); var unimpwords = LoadValueOnlySection(doc, "grader-tc"); foreach (var unimpword in unimpwords) { dict.UnimportantWords.Add(unimpword); } return dict; }
private static Dictionary<string, string> LoadKeyValueRule(XElement doc, string section, string container) { var dictionary = new Dictionary<string, string>(); var step1Pre = doc.Elements(section).Elements(container); foreach ( var keyvalue in step1Pre.Elements() .Select(x => x.Value) .Select(rule => rule.Split('|')) .Where(keyvalue => !dictionary.ContainsKey(keyvalue[0]))) { dictionary.Add(keyvalue[0], keyvalue[1]); } return dictionary; }