public void AddCorrection(MisspelledWord error) { string context = ExportContext ? ";" + error.GetLeftContext().ToStringRepresentation() + ";" + error.GetRightContext().ToStringRepresentation() : ""; writer.WriteLine(error.WrongWord + ";" + error.CorrectWord + ";" + error.RevokedByLm.ToString() + context); if (!String.IsNullOrEmpty(error.CorrectWord)) { writerCorrected.WriteLine(error.WrongWord + ";" + error.CorrectWord + ";" + Math.Round(error.Accuracy,1).ToString() + ";" + error.CorrectedBy.ToString() +";" + error.IsName().ToString()); } }
public LanguageModelEvaluation EvaluateCandidates(MisspelledWord word, Dictionary<string, double> candidates) { foundInNgrams = false; List<string> leftContext = word.GetLeftContext(); NgramType type = this.dictionary.GetHighestAvailableNgramCollection(leftContext.Count); Dictionary<string, double> probability = new Dictionary<string, double>(); string[] lcArray = this.GetLeftContext(leftContext, type); NgramEvaluation evaluation; foreach (KeyValuePair<string, double> option in candidates) { lcArray[leftContext.Count - 1] = option.Key.Contains(' ') ? option.Key.Split(space).First() : option.Key; evaluation = this.dictionary.GetNgramCollection(type).GetProbability(lcArray); probability.Add(option.Key, evaluation.Probability); if (!foundInNgrams && evaluation.Occurence > 0) { foundInNgrams = true; } } List<string> rightContext = word.GetRightContext(); NgramType secType = this.dictionary.GetHighestAvailableNgramCollection(rightContext.Count); if (type == NgramType.Unigram && type == NgramType.Unigram) { // do nothing } else { string[] rcArray = this.GetRightContext(rightContext, secType); foreach (KeyValuePair<string, double> option in candidates) { rcArray[0] = option.Key.Contains(' ') ? option.Key.Split(space).Last() : option.Key; evaluation = this.dictionary.GetNgramCollection(secType).GetProbability(rcArray); probability[option.Key] *= evaluation.Probability; if (!foundInNgrams && evaluation.Occurence > 0) { foundInNgrams = true; } } } return new LanguageModelEvaluation(probability, foundInNgrams); }
public void Write(MisspelledWord error = null) { int batch = 0; string data = ""; while (!reader.EndOfStream) { data += (char)reader.Read(); pos++; batch++; if (error != null && pos == error.GetPosition()) { data += error.CorrectWord; for (int i = 0; i < error.WrongWord.Length; i++) { pos++; reader.Read(); } break; } if (batch > maxCharsInBatch) { writer.Write(data); data = ""; } } writer.Write(data); }
public void Push(MisspelledWord error) { Write(error); }
public void Correct(MisspelledWord misspelling) { List<string> candidatesAccent = null; string word = null; double accuracy = 0; if (this.accentModel != null) { candidatesAccent = this.accentModel.AddAccent(misspelling.WrongWord); } Dictionary<string, double> candidates; if(null != candidatesAccent && candidatesAccent.Count > 0){ misspelling.CorrectedBy = CorrectedBy.AccentModel; candidates = candidatesAccent.ToDictionary(); } else { misspelling.CorrectedBy = CorrectedBy.ErrorModel; candidates = this.errorModel.GeneratePossibleWords(misspelling.WrongWord); } if (candidates.Count > 1) { double totalProps = 0; LanguageModelEvaluation evaluation = this.languageModel.EvaluateCandidates(misspelling, candidates); if (skipCandidatesMissingInNgrams && !evaluation.FoundInNgrams) { misspelling.RevokedByLm = true; return; } foreach (KeyValuePair<string, double> option in candidates) { evaluation.Probabilities[option.Key] *= option.Value; totalProps += evaluation.Probabilities[option.Key]; } double? max = null; foreach (KeyValuePair<string, double> pair in evaluation.Probabilities) { if (null == max || pair.Value > max) { max = pair.Value; word = pair.Key; accuracy = (pair.Value * 100) / totalProps; } } misspelling.CorrectedBy = misspelling.CorrectedBy == CorrectedBy.ErrorModel ? CorrectedBy.ErrorAndLanguageModel : CorrectedBy.AccentAndLanguageModel; } else if (candidates.Count == 1) { accuracy = 100; word = candidates.First().Key; } if (null != word) { misspelling.Accuracy = accuracy; misspelling.CorrectWord = word; } }
public MisspelledWord GetMisspelledWord() { MisspelledWord word = null; if (this.errorPositions[this.ContextSize]) { int errors = 0; for (int i = 0; i < this.errorPositions.Count; i++) { if (this.errorPositions[i]) { errors++; } } //detekce jineho jazyka if (errors >= (this.windowSize - 1)) { return null; } // v okolnich slovech je chyba, takze preskocit if (this.errorPositions[this.ContextSize - 1] && this.errorPositions[this.ContextSize + 1]) { return null; } word = new MisspelledWord(this.history, this.ContextSize); if (!word.AreNeighborsInContext()) { return null; } } return word; }