public void SetUp() { WordAnalyzer = new WordAnalyzer { MinSubwordLength = 2, MaxSubwordLength = 4, }; }
private void BuildWords(int minSubwordLength, int maxSubwordLength, bool capitalize, params string[] words) { this.wordAnalyzer = new WordAnalyzer { MinSubwordLength = minSubwordLength, MaxSubwordLength = maxSubwordLength, }; this.wordAnalyzer.Analyze(words); this.wordBuilder = new WordBuilder(this.wordAnalyzer, standardSeed).Capitalize(capitalize); }
/// <summary> /// Test the speed with a million different word types /// </summary> /// <param name="analyzer"></param> public static void TestWithAMillionWords(WordAnalyzer analyzer) { string[] lines = File.ReadAllLines(AMillionWords, Encoding.UTF8); Stopwatch sw = Stopwatch.StartNew(); Process(lines, analyzer); sw.Stop(); Console.WriteLine("Time taken for a million different words: {0} s", sw.Elapsed.TotalSeconds); GC.Collect(); }
private static void Process(IEnumerable <string> tokens, WordAnalyzer analyzer) { foreach (string token in tokens) { IList <Word> sol; if (!Cache.TryAnalyze(token, out sol)) { analyzer.Analyze(token); } } }
/// <summary> /// Test the speed with a million word corpus which includes same words many times /// </summary> /// <param name="analyzer"></param> public static void TestWithAMillionTokens(WordAnalyzer analyzer) { string[] lines = File.ReadAllLines(AMillionTokens, Encoding.UTF8); Stopwatch sw = Stopwatch.StartNew(); Process(lines, analyzer); sw.Stop(); Console.WriteLine("For a million tokens\tcache: {0}\ttime: {1} s\tmemory: {2}", Cache.GetSize(), sw.Elapsed.TotalSeconds, GC.GetTotalMemory(false) / 1024); GC.Collect(); }
public static void TestMillionTimesWithSingleWord(string word, WordAnalyzer analyzer) { Stopwatch sw = Stopwatch.StartNew(); for (int i = 0; i < Million; i++) { analyzer.Analyze(word); } sw.Stop(); Console.WriteLine("Time taken for the word \"{0} \" is {1}ms", word, sw.Elapsed.TotalMilliseconds); GC.Collect(); }
public static void Analyze(WordAnalyzer analyzer, IEnumerable <string> words) { foreach (string test in words) { IList <Word> solutions = analyzer.Analyze(test); Console.WriteLine("\n{0} için {1} çözüm bulundu:", test, solutions.Count); foreach (Word solution in solutions) { Console.WriteLine("\t{0}\n", solution); } } }
public static void Analyze(WordAnalyzer analyzer, string inputFilename, string undefinedOutputFilename) { IList <string> undefined = new List <string>(); string[] lines = File.ReadAllLines(inputFilename, Encoding.UTF8); foreach (string line in lines) { IList <Word> solutions = analyzer.Analyze(line); if (!solutions.Any()) { undefined.Add(line); } } File.WriteAllLines(undefinedOutputFilename, undefined); }
public static void DrawDefaultTagCloud() { var layout = new CircularCloudLayouter(); var text = "So I said yes to Thomas Clinton and later thought that I had said yes to God and later still realized I had said yes only to Thomas Clinton"; var analyzer = new WordAnalyzer(); var parsedText = analyzer.TextAnalyzer(text); var weightedWords = analyzer.WeightWords(parsedText); var tagLayout = new TagCloudLayouter(layout, weightedWords); var tags = tagLayout.GetTags(); var visualizer = new TagCloudVisualization(tags); var bitmap = visualizer.GetTagCloudImage(); bitmap.Save("tag_cloud.png"); }
public static void AnalyzeTokensToFile(WordAnalyzer analyzer, IEnumerable <string> words, string undefinedOutputFilename) { IList <string> lines = new List <string>(); foreach (string word in words) { string line = word; IList <Word> solutions = analyzer.Analyze(word); foreach (Word solution in solutions) { line += "\t" + solution; } lines.Add(line); } File.WriteAllLines(undefinedOutputFilename, lines); }
public static String[] ReplaceRoots(string root, string[] words) { Language turkish = Language.Turkish; var analyzer = new WordAnalyzer(turkish); var replacedWords = new List <string>(); foreach (string word in words) { IEnumerable <Word> solutions = analyzer.Analyze(word, true, true); foreach (Word solution in solutions) { string output = solution.GetSurface(); solution.Root = turkish.GetRootsHavingSurface(root).First(); output += "\t" + solution.GetSurface(); replacedWords.Add(output); } } return(replacedWords.ToArray()); }
public void TestFindRhymes_EnterEmptyString_ReturnsNull() { // arrange var input = " "; var data = new Mock <IData>(); var mockData = new Dictionary <string, int> { { "Orange", 0 }, { "Dogs", 0 }, { "Strange", 0 } }; // act data.Setup(x => x.WordsCollection()).Returns(mockData); var sut = new WordAnalyzer(data.Object); var testResult = sut.FindRhymes(input); // assert Assert.AreEqual(testResult, null); }
public void TestFindRhymes_EnterValidInput_ReturnsBestMatch() { // arrange var input = "Strange"; var data = new Mock <IData>(); var mockData = new Dictionary <string, int> { { "Orange", 0 }, { "Dogs", 0 }, { "Lounge", 0 } }; // act data.Setup(x => x.WordsCollection()).Returns(mockData); var sut = new WordAnalyzer(data.Object); var testResult = sut.FindRhymes(input); // assert Assert.AreEqual(testResult[0], "Orange"); }
public RuleBasedStemmer(WordAnalyzer analyzer) { this.analyzer = analyzer; }
public void SetUp() { wordAnalyzer = new WordAnalyzer(); }
public StatisticalStemmer(NGramModel model, WordAnalyzer analyzer) { this.model = model; this.analyzer = analyzer; }
public void SetUp() { layout = new CircularCloudLayouter(); wordAnalyzer = new WordAnalyzer(); }