internal void ParseDictionary(Dictionary dictionary) { AffixRules rules = null; Encoding encoding = null; string affixFile = dictionary.GetFile(DictionaryFileType.Affix); if (null != affixFile) { encoding = Utils.DetectEncoding(affixFile); if (null == encoding) { encoding = EncodingDetector.DetectEncoding(affixFile); } rules = this.affixParser.Parse(affixFile, encoding); } string fileName = dictionary.GetFile(DictionaryFileType.Dictionary); DictionaryWithFlags rawDict = this.parser.Parse(fileName, encoding); foreach (DictionaryItemWithFlags item in rawDict) { if (null == item.Flags) { dictionary.Add(item.Word); } else { dictionary.AddRange(rules.GetPossibleWords(item)); } } }
internal void ParseConfusionMatrixes(Dictionary dictionary) { DictionaryFileType[] matrixes = new DictionaryFileType[] { DictionaryFileType.DeletetionsMatrix, DictionaryFileType.InsertionsMatrix, DictionaryFileType.SubstitutionsMatrix, DictionaryFileType.TranspositionsMatrix }; foreach (DictionaryFileType type in matrixes) { string file = dictionary.GetFile(type); if (null != file) { ConfusionMatrix matrix = this.matrixParser.ParseMatrix(file); dictionary.AddConfusionMatrix(ConvertFileTypeToEditOperation(type), matrix); } } }
internal void ParseFrequences(Dictionary dictionary) { FrequencyVector<string> oneChrFrq = this.frequencyParser.ParseFrequency(dictionary.GetFile(DictionaryFileType.OneCharFrequences)); dictionary.AddFrequencyVector(FrequencyVectorType.OneChar, oneChrFrq); FrequencyVector<string> twoChrFrq = this.frequencyParser.ParseFrequency(dictionary.GetFile(DictionaryFileType.TwoCharFrequences)); dictionary.AddFrequencyVector(FrequencyVectorType.TwoChar, twoChrFrq); }
// todo move internal void ParseSimpleDictionary(Dictionary dictionary) { string file = dictionary.GetFile(DictionaryFileType.LineDictionary); if (null != file) { Encoding enc = EncodingDetector.DetectEncoding(file); using (StreamReader reader = new StreamReader(file, enc)) { while (!reader.EndOfStream) { dictionary.Add(reader.ReadLine()); } } } }
internal void ParseNgrams(Dictionary dictionary) { DictionaryFileType[] files = new DictionaryFileType[] { DictionaryFileType.UnigramFrequences, DictionaryFileType.DigramFrequences, DictionaryFileType.TrigramFrequences }; foreach (DictionaryFileType type in files) { string file = dictionary.GetFile(type); if (null != file) { NgramCollection collection = this.ngramParser.ParseNgrams(file); NgramType ngramType; switch (type) { case DictionaryFileType.DigramFrequences: ngramType = NgramType.Digram; break; case DictionaryFileType.TrigramFrequences: ngramType = NgramType.Trigram; break; default: ngramType = NgramType.Unigram; break; } dictionary.AddNgramCollection(ngramType, collection); } } }