예제 #1
0
        internal void ParseDictionary(Dictionary dictionary)
        {
            AffixRules rules = null;
            Encoding encoding = null;

            string affixFile = dictionary.GetFile(DictionaryFileType.Affix);
            if (null != affixFile)
            {
                encoding = Utils.DetectEncoding(affixFile);
                if (null == encoding)
                {
                    encoding = EncodingDetector.DetectEncoding(affixFile);
                }
                rules = this.affixParser.Parse(affixFile, encoding);
            }

            string fileName = dictionary.GetFile(DictionaryFileType.Dictionary);
            DictionaryWithFlags rawDict = this.parser.Parse(fileName, encoding);

            foreach (DictionaryItemWithFlags item in rawDict)
            {
                if (null == item.Flags)
                {
                    dictionary.Add(item.Word);
                }
                else
                {
                    dictionary.AddRange(rules.GetPossibleWords(item));
                }
            }
        }
예제 #2
0
        internal void ParseConfusionMatrixes(Dictionary dictionary)
        {
            DictionaryFileType[] matrixes = new DictionaryFileType[] {
                DictionaryFileType.DeletetionsMatrix,
                DictionaryFileType.InsertionsMatrix,
                DictionaryFileType.SubstitutionsMatrix,
                DictionaryFileType.TranspositionsMatrix
            };

            foreach (DictionaryFileType type in matrixes)
            {
                string file = dictionary.GetFile(type);
                if (null != file)
                {
                    ConfusionMatrix matrix = this.matrixParser.ParseMatrix(file);
                    dictionary.AddConfusionMatrix(ConvertFileTypeToEditOperation(type), matrix);
                }
            }
        }
예제 #3
0
        internal void ParseFrequences(Dictionary dictionary)
        {
            FrequencyVector<string> oneChrFrq = this.frequencyParser.ParseFrequency(dictionary.GetFile(DictionaryFileType.OneCharFrequences));
            dictionary.AddFrequencyVector(FrequencyVectorType.OneChar, oneChrFrq);

            FrequencyVector<string> twoChrFrq = this.frequencyParser.ParseFrequency(dictionary.GetFile(DictionaryFileType.TwoCharFrequences));
            dictionary.AddFrequencyVector(FrequencyVectorType.TwoChar, twoChrFrq);
        }
예제 #4
0
 // todo move
 internal void ParseSimpleDictionary(Dictionary dictionary)
 {
     string file = dictionary.GetFile(DictionaryFileType.LineDictionary);
     if (null != file)
     {
         Encoding enc = EncodingDetector.DetectEncoding(file);
         using (StreamReader reader = new StreamReader(file, enc))
         {
             while (!reader.EndOfStream)
             {
                 dictionary.Add(reader.ReadLine());
             }
         }
     }
 }
예제 #5
0
        internal void ParseNgrams(Dictionary dictionary)
        {
            DictionaryFileType[] files = new DictionaryFileType[]
            {
                DictionaryFileType.UnigramFrequences,
                DictionaryFileType.DigramFrequences,
                DictionaryFileType.TrigramFrequences
            };

            foreach (DictionaryFileType type in files)
            {
                string file = dictionary.GetFile(type);

                if (null != file)
                {
                    NgramCollection collection = this.ngramParser.ParseNgrams(file);
                    NgramType ngramType;
                    switch (type)
                    {
                        case DictionaryFileType.DigramFrequences:
                            ngramType = NgramType.Digram;
                            break;

                        case DictionaryFileType.TrigramFrequences:
                            ngramType = NgramType.Trigram;
                            break;

                        default:
                            ngramType = NgramType.Unigram;
                            break;
                    }

                    dictionary.AddNgramCollection(ngramType, collection);
                }
            }
        }