public Dictionary <string, string[]> GetMatchedPrefix(string word, string[] prePronunciation)
        {
            Dictionary <string, string[]> matchedPrefix = null;

            matchedPrefix = _wordStructureRule.GetMatchedPrefix(word, prePronunciation);
            if (matchedPrefix == null)
            {
                matchedPrefix = new Dictionary <string, string[]>();
            }
            matchedPrefix.Add("", prePronunciation);
            return(matchedPrefix);
        }
예제 #2
0
        public string[] ToThaiPhoneme(string[] thaiPronunciations)
        {
            if (!this.IsInitialized)
            {
                this.InitializePronunciationRules();
            }

            string[] thaiPhonemes = new string[thaiPronunciations.Length];
            for (int count = 0; count < thaiPronunciations.Length; count++)
            {
                string s = thaiPronunciations[count];
                Dictionary <string, string[]> matchedPrefix = _thaiPronunciationStructureRule.GetMatchedPrefix(s, new string[] { "" });
                thaiPhonemes[count] = matchedPrefix.ContainsKey(s) ? matchedPrefix[s][0] : "$-$-$-$";
            }
            return(thaiPhonemes);
        }
예제 #3
0
        //private bool ToThaiPronunciation(string word,
        //    List<KeyValuePair<string, string[]>> currentSegmentedWordList, List<string> currentPronunciationList, int currentScore,
        //    List<KeyValuePair<string, string[]>> resultSegmentedWordList, List<string> resultPronunciationList, ref int resultLeftWordLength, ref double resultScore,
        //    ref int numSolutionFound)
        //{
        //    if (!_cachedThaiDictMatchedPrefix.ContainsKey(word))
        //        _cachedThaiDictMatchedPrefix.Add(word, WordStructureThaiDictRule.Instance.GetMatchedPrefix(word, new string[] { "" }));
        //    if (!_cachedThaiRuleMatchedPrefix.ContainsKey(word))
        //        _cachedThaiRuleMatchedPrefix.Add(word, _thaiWordStructureRule.GetMatchedPrefix(word, new string[] { "" }));
        //    Dictionary<string, string[]> thaiDictMatchedPrefix = _cachedThaiDictMatchedPrefix[word];
        //    Dictionary<string, string[]> thaiRuleMatchedPrefix = _cachedThaiRuleMatchedPrefix[word];
        //    Dictionary<string, string[]> matchedPrefix = new Dictionary<string, string[]>(thaiDictMatchedPrefix);
        //    foreach (KeyValuePair<string, string[]> keyValuePair in thaiRuleMatchedPrefix)
        //        if (!matchedPrefix.ContainsKey(keyValuePair.Key))
        //            matchedPrefix.Add(keyValuePair.Key, keyValuePair.Value);
        //    if (matchedPrefix != null && matchedPrefix.Count > 0)
        //    {
        //        string[] keys = new string[matchedPrefix.Count];
        //        matchedPrefix.Keys.CopyTo(keys, 0);
        //        Array.Sort<string>(keys, new Comparison<string>(delegate(string x, string y)
        //        {
        //            int firstCompareResult = (thaiDictMatchedPrefix.ContainsKey(y) ? 1 : 0) - (thaiDictMatchedPrefix.ContainsKey(x) ? 1 : 0);
        //            return firstCompareResult != 0 ? firstCompareResult : (y.Length - x.Length);
        //        }));

        //        foreach (string prefix in keys)
        //        {
        //            int nextScore = currentScore;
        //            if (thaiDictMatchedPrefix.ContainsKey(prefix))
        //                nextScore += prefix.Length;

        //            if (nextScore + word.Length - prefix.Length < resultScore)
        //                continue;

        //            string[] prefixPronunciations = matchedPrefix[prefix];
        //            currentSegmentedWordList.Add(new KeyValuePair<string, string[]>(prefix, prefixPronunciations));
        //            currentPronunciationList.AddRange(prefixPronunciations);
        //            if (this.ToThaiPronunciation(word.Substring(prefix.Length),
        //                currentSegmentedWordList, currentPronunciationList, nextScore,
        //                resultSegmentedWordList, resultPronunciationList, ref resultLeftWordLength,
        //                ref resultScore, ref numSolutionFound))
        //                return true;
        //            currentSegmentedWordList.RemoveAt(currentSegmentedWordList.Count - 1);
        //            currentPronunciationList.RemoveRange(currentPronunciationList.Count - prefixPronunciations.Length, prefixPronunciations.Length);
        //        }
        //    } else
        //    {
        //        if (word.Length < resultLeftWordLength
        //            || word.Length == resultLeftWordLength
        //            && (currentScore > resultScore
        //            || currentScore == resultScore && currentSegmentedWordList.Count < resultSegmentedWordList.Count))
        //        {
        //            resultSegmentedWordList.Clear();
        //            resultSegmentedWordList.AddRange(currentSegmentedWordList);
        //            resultPronunciationList.Clear();
        //            resultPronunciationList.AddRange(currentPronunciationList);
        //            resultLeftWordLength = word.Length;
        //            resultScore = currentScore;
        //        }
        //        if (++numSolutionFound >= this.MaxSearchingResult)
        //            return true;
        //    }
        //    return false;
        //}
        private void SegmentThaiSentence(string word,
                                         out List <KeyValuePair <string, string[]> > resultSegmentedWordList, out int resultScore, out int segmentedWordsLength)
        {
            if (_cachedSegmentedWordList.ContainsKey(word))
            {
                resultSegmentedWordList = _cachedSegmentedWordList[word];
                resultScore             = _cachedResultScore[word];
                segmentedWordsLength    = _cachedSegmentedWordsLength[word];
                return;
            }
            resultSegmentedWordList = new List <KeyValuePair <string, string[]> >();
            resultScore             = 0;
            segmentedWordsLength    = 0;
            Dictionary <string, string[]> thaiDictMatchedPrefix = WordStructureThaiDictRule.Instance.GetMatchedPrefix(word, new string[] { "" });
            Dictionary <string, string[]> thaiRuleMatchedPrefix = _thaiWordStructureRule.GetMatchedPrefix(word, new string[] { "" });
            Dictionary <string, string[]> matchedPrefix         = new Dictionary <string, string[]>(thaiDictMatchedPrefix);

            foreach (KeyValuePair <string, string[]> keyValuePair in thaiRuleMatchedPrefix)
            {
                if (!matchedPrefix.ContainsKey(keyValuePair.Key))
                {
                    matchedPrefix.Add(keyValuePair.Key, keyValuePair.Value);
                }
            }
            if (matchedPrefix != null && matchedPrefix.Count > 0)
            {
                string[] keys = new string[matchedPrefix.Count];
                matchedPrefix.Keys.CopyTo(keys, 0);
                Array.Sort <string>(keys, new Comparison <string>(delegate(string x, string y)
                {
                    int firstCompareResult = (thaiDictMatchedPrefix.ContainsKey(y) ? 1 : 0) - (thaiDictMatchedPrefix.ContainsKey(x) ? 1 : 0);
                    return(firstCompareResult != 0 ? firstCompareResult : (y.Length - x.Length));
                }));

                foreach (string prefix in keys)
                {
                    List <KeyValuePair <string, string[]> > subResultSegmentedWordList;
                    int subResultScore;
                    int subResultSegmentedWordLength;
                    this.SegmentThaiSentence(word.Substring(prefix.Length), out subResultSegmentedWordList, out subResultScore, out subResultSegmentedWordLength);
                    int currentScore = (thaiDictMatchedPrefix.ContainsKey(prefix) ? prefix.Length : 0) + subResultScore;
                    int currentSegmentedWordsLength = prefix.Length + subResultSegmentedWordLength;
                    if (currentSegmentedWordsLength > segmentedWordsLength ||
                        currentSegmentedWordsLength == segmentedWordsLength &&
                        (currentScore > resultScore ||
                         currentScore == resultScore &&
                         subResultSegmentedWordList.Count < resultSegmentedWordList.Count))
                    {
                        string[] prefixPronunciations = matchedPrefix[prefix];
                        resultSegmentedWordList = new List <KeyValuePair <string, string[]> >();
                        resultSegmentedWordList.Add(new KeyValuePair <string, string[]>(prefix, prefixPronunciations));
                        resultSegmentedWordList.AddRange(subResultSegmentedWordList);
                        resultScore          = currentScore;
                        segmentedWordsLength = currentSegmentedWordsLength;
                    }
                }
            }
            _cachedSegmentedWordList.Add(word, resultSegmentedWordList);
            _cachedResultScore.Add(word, resultScore);
            _cachedSegmentedWordsLength.Add(word, segmentedWordsLength);
        }