public string GetStem(string word) { IList <Word> solutions = analyzer.Analyze(word); if (solutions.Count == 0) { return(word); } if (solutions.Count == 1) { return(solutions[0].GetStem().GetSurface()); } double max = double.NegativeInfinity; int maxIndex = 0; for (int i = 0; i < solutions.Count; i++) { double p = model.GetSentenceProbability(solutions[i].GetMorphemeIds()); //Console.WriteLine(solutions[i] + "\t" + p); if (p > max) { max = p; maxIndex = i; } } return(solutions[maxIndex].GetStem().GetSurface()); }
public void It_can_analyze_banana() { WordAnalyzer.Analyze("banana"); Expect(WordAnalyzer.AnalyzedWords, EquivalentTo(new[] { "banana" })); Expect(WordAnalyzer.WordLengthFrequency.Count, EqualTo(1)); Expect(WordAnalyzer.WordLengthFrequency[6], EqualTo(1)); Expect(WordAnalyzer.SubwordFollowingFrequency.Count, EqualTo(6)); var baFollowers = new Dictionary <string, int> { { "na", 1 }, { "nan", 1 }, { "nana", 1 } }; Expect(WordAnalyzer.SubwordFollowingFrequency["ba"], EquivalentTo(baFollowers)); var anFollowers = new Dictionary <string, int> { { "an", 1 }, { "ana", 1 } }; Expect(WordAnalyzer.SubwordFollowingFrequency["an"], EquivalentTo(anFollowers)); var naFollowers = new Dictionary <string, int> { { "na", 1 } }; Expect(WordAnalyzer.SubwordFollowingFrequency["na"], EquivalentTo(naFollowers)); var banFollowers = new Dictionary <string, int> { { "an", 1 }, { "ana", 1 } }; Expect(WordAnalyzer.SubwordFollowingFrequency["ban"], EquivalentTo(banFollowers)); var anaFollowers = new Dictionary <string, int> { { "na", 1 } }; Expect(WordAnalyzer.SubwordFollowingFrequency["ana"], EquivalentTo(anaFollowers)); var banaFollowers = new Dictionary <string, int> { { "na", 1 } }; Expect(WordAnalyzer.SubwordFollowingFrequency["bana"], EquivalentTo(banaFollowers)); }
private static void Process(IEnumerable <string> tokens, WordAnalyzer analyzer) { foreach (string token in tokens) { IList <Word> sol; if (!Cache.TryAnalyze(token, out sol)) { analyzer.Analyze(token); } } }
public static void Analyze(WordAnalyzer analyzer, IEnumerable <string> words) { foreach (string test in words) { IList <Word> solutions = analyzer.Analyze(test); Console.WriteLine("\n{0} için {1} çözüm bulundu:", test, solutions.Count); foreach (Word solution in solutions) { Console.WriteLine("\t{0}\n", solution); } } }
public static void TestMillionTimesWithSingleWord(string word, WordAnalyzer analyzer) { Stopwatch sw = Stopwatch.StartNew(); for (int i = 0; i < Million; i++) { analyzer.Analyze(word); } sw.Stop(); Console.WriteLine("Time taken for the word \"{0} \" is {1}ms", word, sw.Elapsed.TotalMilliseconds); GC.Collect(); }
public static void Analyze(WordAnalyzer analyzer, string inputFilename, string undefinedOutputFilename) { IList <string> undefined = new List <string>(); string[] lines = File.ReadAllLines(inputFilename, Encoding.UTF8); foreach (string line in lines) { IList <Word> solutions = analyzer.Analyze(line); if (!solutions.Any()) { undefined.Add(line); } } File.WriteAllLines(undefinedOutputFilename, undefined); }
/// <summary> /// Kelimelerin sadece ilk çözümlerini kullanan bir ContainsAnalysis(token, analysis) testi üretir. /// Kelimelerden birinin çözümünün olmaması halinde exception verir. /// </summary> /// <param name="words"></param> /// <param name="testName"></param> /// <returns></returns> public static string GenerateContainsAnalysisTest(string[] words, string testName) { var sb = new StringBuilder(""); foreach (string word in words) { IList <Word> solutions = Analyzer.Analyze(word); try { sb.AppendFormat("[TestCase(\"{0}\", \"{1}\"", word, solutions[0]); } catch (Exception) { Console.WriteLine(word + " çözümlenemedi."); } sb.Append(")]").Append("\n"); } sb.AppendFormat("public void {0}Test(string token, string analysis)", testName).AppendLine(); sb.Append("{").AppendLine(); sb.Append("\tTester.ContainsAnalysis(token, analysis);").AppendLine(); sb.Append("}").AppendLine(); return(sb.ToString()); }
/// <summary> /// Aranan çözümlerin hepsinin kelimenin çözümleri içerisinde bir ve yalnız bir adet mevcut olup olmadığına /// bakar. Mesela aranan çözümler {a,b}, kelimenin çözümleri {a,b,c} olsun test başarılıdır. /// aranan çözümler {a,b}, kelimenin çözümleri {a,c,d} olsun test başarısızdır. /// Aranan çözümler {a,b}, kelimenin çözümleri {a,a,b} ise test başarısızdır. /// Aranan çözüm tek ise {a} ContainsAnalysis metodu ile aynı çıktıyı üretir. /// Aranan çözümlerin hangi sırada verildiği önemli değildir. /// </summary> /// <param name="token">kelime</param> /// <param name="expectedAnalyses">aranan çözümler</param> public static void ContainsAnalyses(string token, string[] expectedAnalyses) { IList <Word> words = Analyzer.Analyze(token); foreach (string expectedAnalysis in expectedAnalyses) { int matchingAnalysisCount = words.Count(w => w.Analysis.Equals(expectedAnalysis)); Assert.AreEqual(1, matchingAnalysisCount); } }
public static void AnalyzeTokensToFile(WordAnalyzer analyzer, IEnumerable <string> words, string undefinedOutputFilename) { IList <string> lines = new List <string>(); foreach (string word in words) { string line = word; IList <Word> solutions = analyzer.Analyze(word); foreach (Word solution in solutions) { line += "\t" + solution; } lines.Add(line); } File.WriteAllLines(undefinedOutputFilename, lines); }
public static String[] ReplaceRoots(string root, string[] words) { Language turkish = Language.Turkish; var analyzer = new WordAnalyzer(turkish); var replacedWords = new List <string>(); foreach (string word in words) { IEnumerable <Word> solutions = analyzer.Analyze(word, true, true); foreach (Word solution in solutions) { string output = solution.GetSurface(); solution.Root = turkish.GetRootsHavingSurface(root).First(); output += "\t" + solution.GetSurface(); replacedWords.Add(output); } } return(replacedWords.ToArray()); }
public string GetStem(string word) { IList <Word> solutions = analyzer.Analyze(word); if (solutions.Count == 0) { return(word); } if (solutions.Count == 1) { return(solutions[0].GetStem().GetSurface()); } solutions = solutions.Reverse().ToList(); if (solutions[0].HasSuffixAt("IY_FIIL_lA", 1) && solutions[1].HasSuffixAt("FY_EDILGEN_Ul_(U)n", 1)) { return(solutions[1].GetStem().GetSurface()); } if (solutions[0].LastSuffixEquals("FIILIMSI_SIFAT_(y)AcAK") && solutions[1].LastSuffixEquals("FC_ZAMAN_GELECEK_(y)AcAK")) { return(solutions[1].GetStem().GetSurface()); } if (solutions[0].LastSuffixEquals("IY_FIIL_lA") && solutions[1].LastSuffixEquals("IC_HAL_VASITA_(y)lA")) { return(solutions[1].GetStem().GetSurface()); } return(solutions[0].GetStem().GetSurface()); }
public static bool HasSolution(string word) { var solutions = Analyzer.Analyze(word); return(solutions.Count > 0); }