Ejemplo n.º 1
0
 /// <summary>
 ///     Test the speed with a million different word types
 /// </summary>
 /// <param name="analyzer"></param>
 public static void TestWithAMillionWords(WordAnalyzer analyzer)
 {
     string[] lines = File.ReadAllLines(AMillionWords, Encoding.UTF8);
     Stopwatch sw = Stopwatch.StartNew();
     Process(lines, analyzer);
     sw.Stop();
     Console.WriteLine("Time taken for a million different words: {0} s", sw.Elapsed.TotalSeconds);
     GC.Collect();
 }
Ejemplo n.º 2
0
 /// <summary>
 ///     Test the speed with a million word corpus which includes same words many times
 /// </summary>
 /// <param name="analyzer"></param>
 public static void TestWithAMillionTokens(WordAnalyzer analyzer)
 {
     string[] lines = File.ReadAllLines(AMillionTokens, Encoding.UTF8);
     Stopwatch sw = Stopwatch.StartNew();
     Process(lines, analyzer);
     sw.Stop();
     Console.WriteLine("For a million tokens\tcache: {0}\ttime: {1} s\tmemory: {2}", Cache.GetSize(),
         sw.Elapsed.TotalSeconds, GC.GetTotalMemory(false)/1024);
     GC.Collect();
 }
Ejemplo n.º 3
0
 public static void TestMillionTimesWithSingleWord(string word, WordAnalyzer analyzer)
 {
     var sw = Stopwatch.StartNew();
     for (var i = 0; i < Million; i++)
     {
         analyzer.Analyze(word);
     }
     sw.Stop();
     Console.WriteLine("Time taken for the word \"{0} \" is {1}ms", word, sw.Elapsed.TotalMilliseconds);
     GC.Collect();
 }
Ejemplo n.º 4
0
 private static void Process(IEnumerable<string> tokens, WordAnalyzer analyzer)
 {
     foreach (string token in tokens)
     {
         IList<Word> sol;
         if (!Cache.TryAnalyze(token, out sol))
         {
             analyzer.Analyze(token);
         }
     }
 }
Ejemplo n.º 5
0
 public static void Analyze(WordAnalyzer analyzer, IEnumerable<string> words)
 {
     foreach (string test in words)
     {
         IList<Word> solutions = analyzer.Analyze(test);
         Console.WriteLine("\n{0} için {1} çözüm bulundu:", test, solutions.Count);
         foreach (Word solution in solutions)
         {
             Console.WriteLine("\t{0}\n", solution);
         }
     }
 }
Ejemplo n.º 6
0
 internal Language(LanguageType type,
                   Orthography orthography,
                   Morphotactics morphotactics,
                   MorphemeContainer <Root> roots,
                   MorphemeContainer <Suffix> suffixes)
 {
     Type          = type;
     Orthography   = orthography;
     Morphotactics = morphotactics;
     Roots         = roots;
     Suffixes      = suffixes;
     Analyzer      = new WordAnalyzer(this);
 }
Ejemplo n.º 7
0
 public static void Analyze(WordAnalyzer analyzer, string inputFilename, string undefinedOutputFilename)
 {
     IList<string> undefined = new List<string>();
     string[] lines = File.ReadAllLines(inputFilename, Encoding.UTF8);
     foreach (string line in lines)
     {
         IList<Word> solutions = analyzer.Analyze(line);
         if (!solutions.Any())
         {
             undefined.Add(line);
         }
     }
     File.WriteAllLines(undefinedOutputFilename, undefined);
 }
Ejemplo n.º 8
0
 public static void AnalyzeTokensToFile(WordAnalyzer analyzer, IEnumerable<string> words,
     string undefinedOutputFilename)
 {
     IList<string> lines = new List<string>();
     foreach (string word in words)
     {
         string line = word;
         IList<Word> solutions = analyzer.Analyze(word);
         foreach (Word solution in solutions)
         {
             line += "\t" + solution;
         }
         lines.Add(line);
     }
     File.WriteAllLines(undefinedOutputFilename, lines);
 }
Ejemplo n.º 9
0
 public static String[] ReplaceRoots(string root, string[] words)
 {
     Language turkish = Language.Turkish;
     var analyzer = new WordAnalyzer(turkish);
     var replacedWords = new List<string>();
     foreach (string word in words)
     {
         IEnumerable<Word> solutions = analyzer.Analyze(word, true, true);
         foreach (Word solution in solutions)
         {
             string output = solution.GetSurface();
             solution.Root = turkish.GetRootsHavingSurface(root).First();
             output += "\t" + solution.GetSurface();
             replacedWords.Add(output);
         }
     }
     return replacedWords.ToArray();
 }
Ejemplo n.º 10
0
 public RuleBasedStemmer(WordAnalyzer analyzer)
 {
     this.analyzer = analyzer;
 }
Ejemplo n.º 11
0
 public StatisticalStemmer(NGramModel model, WordAnalyzer analyzer)
 {
     this.model = model;
     this.analyzer = analyzer;
 }