Exemple #1
0
 public YearBornBot()
 {
     this.network = (FeedforwardNetwork)SerializeObject
                    .Load(Config.FILENAME_WHENBORN_NET);
     this.histogram = (WordHistogram)SerializeObject
                      .Load(Config.FILENAME_HISTOGRAM);
 }
Exemple #2
0
 public void Test_StripSymbols()
 {
     Assert.AreEqual("test-moi", WordHistogram.StripSymbols(",test-moi!!!..."));
     Assert.AreEqual("test", WordHistogram.StripSymbols(",test!!!..."));
     Assert.AreEqual("fhdhfejfdshfiwfsjkjfdo", WordHistogram.StripSymbols("...,fhdhfejfdshfiwfsjkjfdo?"));
     Assert.AreEqual("fhdhfejfdshfiwfsjkjfdo", WordHistogram.StripSymbols("...,fhdhfejfdshfiwfsjkjfdo    ?"));
     Assert.AreEqual("aaaa", WordHistogram.StripSymbols("aaaa?"));
     Assert.AreEqual("aaaa", WordHistogram.StripSymbols("?aaaa"));
     Assert.AreEqual("aaaa", WordHistogram.StripSymbols("aaaa"));
     Assert.AreEqual("", WordHistogram.StripSymbols("#$%^&*"));
     Assert.AreEqual("", WordHistogram.StripSymbols(""));
 }
Exemple #3
0
        private void CreateWordHistogram(string filenameTxt)
        {
            var outPrefix = Path.Combine(Path.GetDirectoryName(filenameTxt), "dict", Path.GetFileNameWithoutExtension(filenameTxt));

            Encoding encoding;
            var      textFr = UtilsPath.ReadTextFileAndGetEncoding(filenameTxt, out encoding);
            var      hist   = WordHistogram.BuildHistogramFromText(textFr);

            File.WriteAllText($"{outPrefix}.histogram-counts.txt",
                              String.Join("\n", hist.Words.Select(x => $"{x.Word}\t{x.Count}")),
                              encoding);
            File.WriteAllText($"{outPrefix}.histogram.txt",
                              String.Join("\n", hist.Words.Select(x => x.Word)),
                              encoding);
            // smaller chunks for stupid GT
            int index = 0;

            foreach (var chunk2000 in hist.Words.Chunk(2000))
            {
                index++;
                Debug.WriteLine($"{index}: {chunk2000.Count()} lines");
                File.WriteAllText($"{outPrefix}.histogram-gt{index}.txt", String.Join("\n", chunk2000.Select(x => x.Word)), encoding);
            }
        }