public YearBornBot() { this.network = (FeedforwardNetwork)SerializeObject .Load(Config.FILENAME_WHENBORN_NET); this.histogram = (WordHistogram)SerializeObject .Load(Config.FILENAME_HISTOGRAM); }
public void Test_StripSymbols() { Assert.AreEqual("test-moi", WordHistogram.StripSymbols(",test-moi!!!...")); Assert.AreEqual("test", WordHistogram.StripSymbols(",test!!!...")); Assert.AreEqual("fhdhfejfdshfiwfsjkjfdo", WordHistogram.StripSymbols("...,fhdhfejfdshfiwfsjkjfdo?")); Assert.AreEqual("fhdhfejfdshfiwfsjkjfdo", WordHistogram.StripSymbols("...,fhdhfejfdshfiwfsjkjfdo ?")); Assert.AreEqual("aaaa", WordHistogram.StripSymbols("aaaa?")); Assert.AreEqual("aaaa", WordHistogram.StripSymbols("?aaaa")); Assert.AreEqual("aaaa", WordHistogram.StripSymbols("aaaa")); Assert.AreEqual("", WordHistogram.StripSymbols("#$%^&*")); Assert.AreEqual("", WordHistogram.StripSymbols("")); }
private void CreateWordHistogram(string filenameTxt) { var outPrefix = Path.Combine(Path.GetDirectoryName(filenameTxt), "dict", Path.GetFileNameWithoutExtension(filenameTxt)); Encoding encoding; var textFr = UtilsPath.ReadTextFileAndGetEncoding(filenameTxt, out encoding); var hist = WordHistogram.BuildHistogramFromText(textFr); File.WriteAllText($"{outPrefix}.histogram-counts.txt", String.Join("\n", hist.Words.Select(x => $"{x.Word}\t{x.Count}")), encoding); File.WriteAllText($"{outPrefix}.histogram.txt", String.Join("\n", hist.Words.Select(x => x.Word)), encoding); // smaller chunks for stupid GT int index = 0; foreach (var chunk2000 in hist.Words.Chunk(2000)) { index++; Debug.WriteLine($"{index}: {chunk2000.Count()} lines"); File.WriteAllText($"{outPrefix}.histogram-gt{index}.txt", String.Join("\n", chunk2000.Select(x => x.Word)), encoding); } }