/// <summary> /// Test the speed with a million different word types /// </summary> /// <param name="analyzer"></param> public static void TestWithAMillionWords(WordAnalyzer analyzer) { string[] lines = File.ReadAllLines(AMillionWords, Encoding.UTF8); Stopwatch sw = Stopwatch.StartNew(); Process(lines, analyzer); sw.Stop(); Console.WriteLine("Time taken for a million different words: {0} s", sw.Elapsed.TotalSeconds); GC.Collect(); }
/// <summary> /// Test the speed with a million word corpus which includes same words many times /// </summary> /// <param name="analyzer"></param> public static void TestWithAMillionTokens(WordAnalyzer analyzer) { string[] lines = File.ReadAllLines(AMillionTokens, Encoding.UTF8); Stopwatch sw = Stopwatch.StartNew(); Process(lines, analyzer); sw.Stop(); Console.WriteLine("For a million tokens\tcache: {0}\ttime: {1} s\tmemory: {2}", Cache.GetSize(), sw.Elapsed.TotalSeconds, GC.GetTotalMemory(false)/1024); GC.Collect(); }
public static void TestMillionTimesWithSingleWord(string word, WordAnalyzer analyzer) { var sw = Stopwatch.StartNew(); for (var i = 0; i < Million; i++) { analyzer.Analyze(word); } sw.Stop(); Console.WriteLine("Time taken for the word \"{0} \" is {1}ms", word, sw.Elapsed.TotalMilliseconds); GC.Collect(); }
private static void Process(IEnumerable<string> tokens, WordAnalyzer analyzer) { foreach (string token in tokens) { IList<Word> sol; if (!Cache.TryAnalyze(token, out sol)) { analyzer.Analyze(token); } } }
public static void Analyze(WordAnalyzer analyzer, IEnumerable<string> words) { foreach (string test in words) { IList<Word> solutions = analyzer.Analyze(test); Console.WriteLine("\n{0} için {1} çözüm bulundu:", test, solutions.Count); foreach (Word solution in solutions) { Console.WriteLine("\t{0}\n", solution); } } }
internal Language(LanguageType type, Orthography orthography, Morphotactics morphotactics, MorphemeContainer <Root> roots, MorphemeContainer <Suffix> suffixes) { Type = type; Orthography = orthography; Morphotactics = morphotactics; Roots = roots; Suffixes = suffixes; Analyzer = new WordAnalyzer(this); }
public static void Analyze(WordAnalyzer analyzer, string inputFilename, string undefinedOutputFilename) { IList<string> undefined = new List<string>(); string[] lines = File.ReadAllLines(inputFilename, Encoding.UTF8); foreach (string line in lines) { IList<Word> solutions = analyzer.Analyze(line); if (!solutions.Any()) { undefined.Add(line); } } File.WriteAllLines(undefinedOutputFilename, undefined); }
public static void AnalyzeTokensToFile(WordAnalyzer analyzer, IEnumerable<string> words, string undefinedOutputFilename) { IList<string> lines = new List<string>(); foreach (string word in words) { string line = word; IList<Word> solutions = analyzer.Analyze(word); foreach (Word solution in solutions) { line += "\t" + solution; } lines.Add(line); } File.WriteAllLines(undefinedOutputFilename, lines); }
public static String[] ReplaceRoots(string root, string[] words) { Language turkish = Language.Turkish; var analyzer = new WordAnalyzer(turkish); var replacedWords = new List<string>(); foreach (string word in words) { IEnumerable<Word> solutions = analyzer.Analyze(word, true, true); foreach (Word solution in solutions) { string output = solution.GetSurface(); solution.Root = turkish.GetRootsHavingSurface(root).First(); output += "\t" + solution.GetSurface(); replacedWords.Add(output); } } return replacedWords.ToArray(); }
public RuleBasedStemmer(WordAnalyzer analyzer) { this.analyzer = analyzer; }
public StatisticalStemmer(NGramModel model, WordAnalyzer analyzer) { this.model = model; this.analyzer = analyzer; }