public List<TextUnitScore> GetImportantTextUnits(List<Sentence> Sentences)
        {
            var textUnitFrequencyGrader = new Dictionary<TextUnit, long>();
            foreach (var tu in Sentences.SelectMany(s => s.TextUnits))
            {
                if (m_Rules.UnimportantWords.Contains(tu.FormattedValue))
                {
                    continue;
                }

                if (textUnitFrequencyGrader.ContainsKey(tu))
                {
                    textUnitFrequencyGrader[tu]++;
                }
                else
                {
                    textUnitFrequencyGrader.Add(tu, 1);
                }
            }

            return
                textUnitFrequencyGrader.OrderByDescending(kvp => kvp.Value)
                    .Select(kvp => new TextUnitScore {ScoredTextUnit = kvp.Key, Score = kvp.Value})
                    .ToList();
        }
        internal string ReplaceWord(string word, Dictionary<string, string> replacementRules)
        {
            foreach (var rule in replacementRules)
            {
                if (word == rule.Key)
                {
                    return rule.Value;
                }
            }

            return word;
        }
 internal string StripPrefix(string word, Dictionary<string, string> prefixRules)
 {
     //not simply using .Replace() in this method in case the
     //rule.Key exists multiple times in the string.
     foreach (var rule in prefixRules)
     {
         if (word.StartsWith(rule.Key))
         {
             word = rule.Value + word.Substring(rule.Key.Length);
         }
     }
     return word;
 }
        public string StripSuffix(string word, Dictionary<string, string> suffixRules)
        {
            //not simply using .Replace() in this method in case the
            //rule.Key exists multiple times in the string.
            foreach (var rule in suffixRules)
            {
                if (word.EndsWith(rule.Key))
                {
                    word = word.Substring(0, word.Length - rule.Key.Length) + rule.Value;
                }
            }

            return word;
        }
 public TextUnitBuilder(Dictionary Rules)
 {
     m_Rules = Rules;
 }
 public ClassicContentParser(Dictionary rules, ITextUnitBuilder textUnitBuilder)
 {
     m_Rules = rules;
     m_TextUnitBuilder = textUnitBuilder;
 }
 public ClassicContentAnalyzer(Dictionary Rules)
 {
     m_Rules = Rules;
 }
        public static Dictionary LoadFromFile(string dictionaryLanguage)
        {
            var dictionaryFile = string.Format(@"{1}\dics\{0}.xml", dictionaryLanguage,
                Path.GetDirectoryName(Assembly.GetExecutingAssembly().GetName().CodeBase)?.Substring(6));
            if (!File.Exists(dictionaryFile))
            {
                throw new FileNotFoundException("Could Not Load Dictionary: " + dictionaryFile);
            }
            var dict = new Dictionary();
            var doc = XElement.Load(dictionaryFile);
            dict.Step1PrefixRules = LoadKeyValueRule(doc, "stemmer", "step1_pre");
            dict.Step1SuffixRules = LoadKeyValueRule(doc, "stemmer", "step1_post");
            dict.ManualReplacementRules = LoadKeyValueRule(doc, "stemmer", "manual");
            dict.PrefixRules = LoadKeyValueRule(doc, "stemmer", "pre");
            dict.SuffixRules = LoadKeyValueRule(doc, "stemmer", "post");
            dict.SynonymRules = LoadKeyValueRule(doc, "stemmer", "synonyms");
            dict.LinebreakRules = LoadValueOnlyRule(doc, "parser", "linebreak");
            dict.NotALinebreakRules = LoadValueOnlyRule(doc, "parser", "linedontbreak");
            dict.DepreciateValueRule = LoadValueOnlyRule(doc, "grader-syn", "depreciate");
            dict.TermFreqMultiplierRule = LoadValueOnlySection(doc, "grader-tf");

            dict.UnimportantWords = new List<string>();
            var unimpwords = LoadValueOnlySection(doc, "grader-tc");
            foreach (var unimpword in unimpwords)
            {
                dict.UnimportantWords.Add(unimpword);
            }
            return dict;
        }
 private static Dictionary<string, string> LoadKeyValueRule(XElement doc, string section, string container)
 {
     var dictionary = new Dictionary<string, string>();
     var step1Pre = doc.Elements(section).Elements(container);
     foreach (
         var keyvalue in
             step1Pre.Elements()
                 .Select(x => x.Value)
                 .Select(rule => rule.Split('|'))
                 .Where(keyvalue => !dictionary.ContainsKey(keyvalue[0])))
     {
         dictionary.Add(keyvalue[0], keyvalue[1]);
     }
     return dictionary;
 }