public TextGenerator(WordTypes wordType = WordTypes.Word)
        {
            if (wordType == WordTypes.Name)
                this.corpus = Corpus.DeserializeFromEmbeddedResource("names.bin");
            else
                this.corpus = Corpus.DeserializeFromEmbeddedResource("text.bin");

            this.wordType = wordType;
        }
 /// <summary>
 /// Initializes a new instance of the <see cref="TextGenerator"/> class.
 /// Will generate text based on provided corpus. Use this to generate fake text based on different languages.
 /// </summary>
 /// <param name="corpus">The corpus.</param>
 public TextGenerator(Corpus corpus)
 {
     if (corpus == null)
         throw new ArgumentException("Corpus is not provided.");
     this.corpus = corpus;
 }
Esempio n. 3
0
        /// <summary>
        /// Creates the corpus from text.
        /// Text should be long enough to generate decent results (e.g. 10Kb+).
        /// </summary>
        /// <param name="text">The text.</param>
        /// <returns></returns>
        public static Corpus CreateFromText(string text)
        {
            Dictionary<LetterStats, LetterStats> letterData = new Dictionary<LetterStats, LetterStats>();
            int charCount = 0;

            // clean text
            string clean = Regex.Replace(text, "[^\\p{L} \t\n]", string.Empty).ToLower();

            string[] words = clean.Split(new char[] { ' ', '\n', '\t' }, StringSplitOptions.RemoveEmptyEntries);

            for (int i = 0; i < words.Length; i++)
            {
                char prevLetter = '\0';
                for (int l = 0; l < words[i].Length; l++)
                {
                    LetterStats ls = new LetterStats(words[i][l], prevLetter, l);
                    if (letterData.ContainsKey(ls))
                        letterData[ls].IncrementCount();
                    else
                        letterData.Add(ls, ls);
                    prevLetter = words[i][l];
                    charCount++;
                }
            }

            Corpus c = new Corpus() { CharCount = charCount, WordCount = words.Length };
            c.letters.AddRange(letterData.Keys);
            return c;
        }