Example #1
0
        public void AddCorrection(MisspelledWord error)
        {
            string context = ExportContext ? ";" + error.GetLeftContext().ToStringRepresentation() + ";" + error.GetRightContext().ToStringRepresentation() : "";
            writer.WriteLine(error.WrongWord + ";" + error.CorrectWord + ";" + error.RevokedByLm.ToString() + context);

            if (!String.IsNullOrEmpty(error.CorrectWord))
            {
                writerCorrected.WriteLine(error.WrongWord + ";" + error.CorrectWord + ";" + Math.Round(error.Accuracy,1).ToString() + ";" + error.CorrectedBy.ToString() +";" + error.IsName().ToString());
            }
        }
Example #2
0
        public LanguageModelEvaluation EvaluateCandidates(MisspelledWord word, Dictionary<string, double> candidates)
        {
            foundInNgrams = false;
            List<string> leftContext = word.GetLeftContext();

            NgramType type = this.dictionary.GetHighestAvailableNgramCollection(leftContext.Count);

            Dictionary<string, double> probability = new Dictionary<string, double>();
            string[] lcArray = this.GetLeftContext(leftContext, type);
            NgramEvaluation evaluation;
            foreach (KeyValuePair<string, double> option in candidates)
            {
                lcArray[leftContext.Count - 1] = option.Key.Contains(' ') ? option.Key.Split(space).First() : option.Key;

                evaluation = this.dictionary.GetNgramCollection(type).GetProbability(lcArray);
                probability.Add(option.Key, evaluation.Probability);

                if (!foundInNgrams && evaluation.Occurence > 0)
                {
                    foundInNgrams = true;
                }
            }

            List<string> rightContext = word.GetRightContext();
            NgramType secType = this.dictionary.GetHighestAvailableNgramCollection(rightContext.Count);

            if (type == NgramType.Unigram && type == NgramType.Unigram)
            {
                // do nothing
            }
            else
            {
                string[] rcArray = this.GetRightContext(rightContext, secType);
                foreach (KeyValuePair<string, double> option in candidates)
                {
                    rcArray[0] = option.Key.Contains(' ') ? option.Key.Split(space).Last() : option.Key;

                    evaluation = this.dictionary.GetNgramCollection(secType).GetProbability(rcArray);
                    probability[option.Key] *= evaluation.Probability;

                    if (!foundInNgrams && evaluation.Occurence > 0)
                    {
                        foundInNgrams = true;
                    }
                }
            }

            return new LanguageModelEvaluation(probability, foundInNgrams);
        }
Example #3
0
        public void Write(MisspelledWord error = null)
        {
            int batch = 0;
            string data = "";
            while (!reader.EndOfStream)
            {
                data += (char)reader.Read();
                pos++;
                batch++;

                if (error != null && pos == error.GetPosition())
                {
                    data += error.CorrectWord;
                    for (int i = 0; i < error.WrongWord.Length; i++)
                    {
                        pos++;
                        reader.Read();
                    }

                    break;
                }

                if (batch > maxCharsInBatch)
                {
                    writer.Write(data);
                    data = "";
                }
            }

            writer.Write(data);
        }
Example #4
0
 public void Push(MisspelledWord error)
 {
     Write(error);
 }
Example #5
0
        public void Correct(MisspelledWord misspelling)
        {
            List<string> candidatesAccent = null;
            string word = null;
            double accuracy = 0;

            if (this.accentModel != null)
            {
                candidatesAccent = this.accentModel.AddAccent(misspelling.WrongWord);
            }

            Dictionary<string, double> candidates;
            if(null != candidatesAccent && candidatesAccent.Count > 0){
                misspelling.CorrectedBy = CorrectedBy.AccentModel;
                candidates = candidatesAccent.ToDictionary();
            } else {
                misspelling.CorrectedBy = CorrectedBy.ErrorModel;
                candidates = this.errorModel.GeneratePossibleWords(misspelling.WrongWord);
            }

            if (candidates.Count > 1)
            {
                double totalProps = 0;
                LanguageModelEvaluation evaluation = this.languageModel.EvaluateCandidates(misspelling, candidates);

                if (skipCandidatesMissingInNgrams && !evaluation.FoundInNgrams)
                {
                    misspelling.RevokedByLm = true;
                    return;
                }

                foreach (KeyValuePair<string, double> option in candidates)
                {
                    evaluation.Probabilities[option.Key] *= option.Value;
                    totalProps += evaluation.Probabilities[option.Key];
                }

                double? max = null;
                foreach (KeyValuePair<string, double> pair in evaluation.Probabilities)
                {
                    if (null == max || pair.Value > max)
                    {
                        max = pair.Value;
                        word = pair.Key;
                        accuracy = (pair.Value * 100) / totalProps;
                    }
                }

                misspelling.CorrectedBy = misspelling.CorrectedBy == CorrectedBy.ErrorModel ? CorrectedBy.ErrorAndLanguageModel : CorrectedBy.AccentAndLanguageModel;
            }
            else if (candidates.Count == 1)
            {
                accuracy = 100;
                word = candidates.First().Key;
            }

            if (null != word)
            {
                misspelling.Accuracy = accuracy;
                misspelling.CorrectWord = word;
            }
        }
Example #6
0
        public MisspelledWord GetMisspelledWord()
        {
            MisspelledWord word = null;

            if (this.errorPositions[this.ContextSize])
            {
                int errors = 0;
                for (int i = 0; i < this.errorPositions.Count; i++)
                {
                    if (this.errorPositions[i])
                    {
                        errors++;
                    }
                }

                //detekce jineho jazyka
                if (errors >= (this.windowSize - 1))
                {
                    return null;
                }

                // v okolnich slovech je chyba, takze preskocit
                if (this.errorPositions[this.ContextSize - 1] && this.errorPositions[this.ContextSize + 1])
                {
                    return null;
                }

                word = new MisspelledWord(this.history, this.ContextSize);
                if (!word.AreNeighborsInContext())
                {
                    return null;
                }
            }

            return word;
        }