Example #1
0
        public string GetStem(string word)
        {
            IList <Word> solutions = analyzer.Analyze(word);

            if (solutions.Count == 0)
            {
                return(word);
            }

            if (solutions.Count == 1)
            {
                return(solutions[0].GetStem().GetSurface());
            }

            double max      = double.NegativeInfinity;
            int    maxIndex = 0;

            for (int i = 0; i < solutions.Count; i++)
            {
                double p = model.GetSentenceProbability(solutions[i].GetMorphemeIds());
                //Console.WriteLine(solutions[i] + "\t" + p);

                if (p > max)
                {
                    max      = p;
                    maxIndex = i;
                }
            }

            return(solutions[maxIndex].GetStem().GetSurface());
        }
        public void It_can_analyze_banana()
        {
            WordAnalyzer.Analyze("banana");

            Expect(WordAnalyzer.AnalyzedWords, EquivalentTo(new[] { "banana" }));

            Expect(WordAnalyzer.WordLengthFrequency.Count, EqualTo(1));
            Expect(WordAnalyzer.WordLengthFrequency[6], EqualTo(1));

            Expect(WordAnalyzer.SubwordFollowingFrequency.Count, EqualTo(6));

            var baFollowers = new Dictionary <string, int> {
                { "na", 1 }, { "nan", 1 }, { "nana", 1 }
            };

            Expect(WordAnalyzer.SubwordFollowingFrequency["ba"], EquivalentTo(baFollowers));

            var anFollowers = new Dictionary <string, int> {
                { "an", 1 }, { "ana", 1 }
            };

            Expect(WordAnalyzer.SubwordFollowingFrequency["an"], EquivalentTo(anFollowers));

            var naFollowers = new Dictionary <string, int> {
                { "na", 1 }
            };

            Expect(WordAnalyzer.SubwordFollowingFrequency["na"], EquivalentTo(naFollowers));

            var banFollowers = new Dictionary <string, int> {
                { "an", 1 }, { "ana", 1 }
            };

            Expect(WordAnalyzer.SubwordFollowingFrequency["ban"], EquivalentTo(banFollowers));

            var anaFollowers = new Dictionary <string, int> {
                { "na", 1 }
            };

            Expect(WordAnalyzer.SubwordFollowingFrequency["ana"], EquivalentTo(anaFollowers));

            var banaFollowers = new Dictionary <string, int> {
                { "na", 1 }
            };

            Expect(WordAnalyzer.SubwordFollowingFrequency["bana"], EquivalentTo(banaFollowers));
        }
Example #3
0
 private static void Process(IEnumerable <string> tokens, WordAnalyzer analyzer)
 {
     foreach (string token in tokens)
     {
         IList <Word> sol;
         if (!Cache.TryAnalyze(token, out sol))
         {
             analyzer.Analyze(token);
         }
     }
 }
Example #4
0
 public static void Analyze(WordAnalyzer analyzer, IEnumerable <string> words)
 {
     foreach (string test in words)
     {
         IList <Word> solutions = analyzer.Analyze(test);
         Console.WriteLine("\n{0} için {1} çözüm bulundu:", test, solutions.Count);
         foreach (Word solution in solutions)
         {
             Console.WriteLine("\t{0}\n", solution);
         }
     }
 }
Example #5
0
        public static void TestMillionTimesWithSingleWord(string word, WordAnalyzer analyzer)
        {
            Stopwatch sw = Stopwatch.StartNew();

            for (int i = 0; i < Million; i++)
            {
                analyzer.Analyze(word);
            }
            sw.Stop();
            Console.WriteLine("Time taken for the word \"{0} \" is {1}ms", word, sw.Elapsed.TotalMilliseconds);
            GC.Collect();
        }
Example #6
0
        public static void Analyze(WordAnalyzer analyzer, string inputFilename, string undefinedOutputFilename)
        {
            IList <string> undefined = new List <string>();

            string[] lines = File.ReadAllLines(inputFilename, Encoding.UTF8);
            foreach (string line in lines)
            {
                IList <Word> solutions = analyzer.Analyze(line);
                if (!solutions.Any())
                {
                    undefined.Add(line);
                }
            }
            File.WriteAllLines(undefinedOutputFilename, undefined);
        }
Example #7
0
        /// <summary>
        ///     Kelimelerin sadece ilk çözümlerini kullanan bir ContainsAnalysis(token, analysis) testi üretir.
        ///     Kelimelerden birinin çözümünün olmaması halinde exception verir.
        /// </summary>
        /// <param name="words"></param>
        /// <param name="testName"></param>
        /// <returns></returns>
        public static string GenerateContainsAnalysisTest(string[] words, string testName)
        {
            var sb = new StringBuilder("");

            foreach (string word in words)
            {
                IList <Word> solutions = Analyzer.Analyze(word);
                try
                {
                    sb.AppendFormat("[TestCase(\"{0}\", \"{1}\"", word, solutions[0]);
                }
                catch (Exception)
                {
                    Console.WriteLine(word + " çözümlenemedi.");
                }

                sb.Append(")]").Append("\n");
            }
            sb.AppendFormat("public void {0}Test(string token, string analysis)", testName).AppendLine();
            sb.Append("{").AppendLine();
            sb.Append("\tTester.ContainsAnalysis(token, analysis);").AppendLine();
            sb.Append("}").AppendLine();
            return(sb.ToString());
        }
Example #8
0
        /// <summary>
        ///     Aranan çözümlerin hepsinin kelimenin çözümleri içerisinde bir ve yalnız bir adet mevcut olup olmadığına
        ///     bakar. Mesela aranan çözümler {a,b}, kelimenin çözümleri {a,b,c} olsun test başarılıdır.
        ///     aranan çözümler {a,b}, kelimenin çözümleri {a,c,d} olsun test başarısızdır.
        ///     Aranan çözümler {a,b}, kelimenin çözümleri {a,a,b} ise test başarısızdır.
        ///     Aranan çözüm tek ise {a} ContainsAnalysis metodu ile aynı çıktıyı üretir.
        ///     Aranan çözümlerin hangi sırada verildiği önemli değildir.
        /// </summary>
        /// <param name="token">kelime</param>
        /// <param name="expectedAnalyses">aranan çözümler</param>
        public static void ContainsAnalyses(string token, string[] expectedAnalyses)
        {
            IList <Word> words = Analyzer.Analyze(token);

            foreach (string expectedAnalysis in expectedAnalyses)
            {
                int matchingAnalysisCount = words.Count(w => w.Analysis.Equals(expectedAnalysis));
                Assert.AreEqual(1, matchingAnalysisCount);
            }
        }
Example #9
0
        public static void AnalyzeTokensToFile(WordAnalyzer analyzer, IEnumerable <string> words,
                                               string undefinedOutputFilename)
        {
            IList <string> lines = new List <string>();

            foreach (string word in words)
            {
                string       line      = word;
                IList <Word> solutions = analyzer.Analyze(word);
                foreach (Word solution in solutions)
                {
                    line += "\t" + solution;
                }
                lines.Add(line);
            }
            File.WriteAllLines(undefinedOutputFilename, lines);
        }
Example #10
0
        public static String[] ReplaceRoots(string root, string[] words)
        {
            Language turkish       = Language.Turkish;
            var      analyzer      = new WordAnalyzer(turkish);
            var      replacedWords = new List <string>();

            foreach (string word in words)
            {
                IEnumerable <Word> solutions = analyzer.Analyze(word, true, true);
                foreach (Word solution in solutions)
                {
                    string output = solution.GetSurface();
                    solution.Root = turkish.GetRootsHavingSurface(root).First();
                    output       += "\t" + solution.GetSurface();
                    replacedWords.Add(output);
                }
            }
            return(replacedWords.ToArray());
        }
Example #11
0
        public string GetStem(string word)
        {
            IList <Word> solutions = analyzer.Analyze(word);

            if (solutions.Count == 0)
            {
                return(word);
            }

            if (solutions.Count == 1)
            {
                return(solutions[0].GetStem().GetSurface());
            }

            solutions = solutions.Reverse().ToList();

            if (solutions[0].HasSuffixAt("IY_FIIL_lA", 1) &&
                solutions[1].HasSuffixAt("FY_EDILGEN_Ul_(U)n", 1))
            {
                return(solutions[1].GetStem().GetSurface());
            }

            if (solutions[0].LastSuffixEquals("FIILIMSI_SIFAT_(y)AcAK") &&
                solutions[1].LastSuffixEquals("FC_ZAMAN_GELECEK_(y)AcAK"))
            {
                return(solutions[1].GetStem().GetSurface());
            }


            if (solutions[0].LastSuffixEquals("IY_FIIL_lA") &&
                solutions[1].LastSuffixEquals("IC_HAL_VASITA_(y)lA"))
            {
                return(solutions[1].GetStem().GetSurface());
            }


            return(solutions[0].GetStem().GetSurface());
        }
Example #12
0
        public static bool HasSolution(string word)
        {
            var solutions = Analyzer.Analyze(word);

            return(solutions.Count > 0);
        }