public void Parse(int textLength) { string text = System.IO.File.ReadAllText(SourcePath); if (text.Length < textLength) { textLength = text.Length; } text = textLength == 0 ? text : text.Substring(0, textLength); text = text.Replace("\r\n\r\n", "<!CaNLCaNL!> "); text = text.Replace("\r\n", "<!ca!>"); // text = text.Replace("\n", "<!-- newline --!>"); var words = text.Split(); for (var i = 0; i < words.Length; i++) { var word = words[i]; if (string.IsNullOrEmpty(word)) { continue; } var blob = _stringBlobs.FirstOrDefault(b => b.Blob == word); var newBlob = blob == null; if (newBlob) { blob = new WordBlob <string>(word); } if (i < words.Length - 2) // 2 because we will try twice. hack { var nWord = words[i + 1]; // NEXT word. nWord = string.IsNullOrEmpty(nWord) ? words[i + 2] : nWord; blob.AddNextBlob(new WordBlob <string>(nWord)); } if (newBlob) { _stringBlobs.Add(blob); } } OutputMessage = $"Success from {SourcePath} to {OutputPath}, words analyzed: {_stringBlobs.Count}"; }
public void ParseChar(int textLength) { string text = System.IO.File.ReadAllText(SourcePath); if (text.Length < textLength) { textLength = text.Length; } //text = text.Substring(0, textLength); var chars = text.ToCharArray(); Console.WriteLine(chars.Length); for (var i = 0; i < chars.Length; i++) { var chr = chars[i]; // if (char.chr == null) continue; var blob = _charBlobs.FirstOrDefault(b => b.Blob == chr); var newBlob = blob == null; if (newBlob) { blob = new WordBlob <char>(chr); } if (i < chars.Length - 2) // 2 because we will try twice. hack { var nWord = chars[i + 1]; // NEXT word. // nWord = string.IsNullOrEmpty(nWord) ? chars[i + 2] : nWord; blob.AddNextBlob(new WordBlob <char>(nWord)); } if (newBlob) { _charBlobs.Add(blob); } } OutputMessage = $"Success from {SourcePath} to {OutputPath}, chars analyzed: {_charBlobs.Count}"; }
public void AddNextBlob(WordBlob <T> wordBlob) { // if (wordBlob == null || string.IsNullOrEmpty(wordBlob.Blob)) return; if (wordBlob == null) { return; } if (_nextBlobs == null) { _nextBlobs = new List <WordBlob <T> >(); } var existingBlob = _nextBlobs.FirstOrDefault(b => b.Blob.Equals(wordBlob.Blob)); if (existingBlob == null) { _nextBlobs.Add(wordBlob); } else { existingBlob.Occurrences++; } }