public void Read10000Format(string filePath) { var sb = new StringBuilder(); int lineCounter = 0; // read in the dictionary file in the wiktionary_frequency_list.txt format foreach (var line in File.ReadLines(filePath, _isoLatin1Encoding)) { lineCounter++; // ignore blank lines if (string.IsNullOrEmpty(line)) continue; // parse var elements = line.Split(new String[] { " " }, StringSplitOptions.RemoveEmptyEntries); // ignore words with only one letter or digit //if (elements[1].Length < 2) continue; var word = new WordElement(); wordElements.Add(word); word.Place = lineCounter; word.Frequency = int.Parse(elements[0]); word.Word = elements[1]; // also add to the string sb.Append(word.Word); sb.Append("\r\n"); } words = sb.ToString(); }
public void Read10KFormat(string filePath) { var sb = new StringBuilder(); int lineCounter = 0; // read in the dictionary file in the ord10k.csv format foreach (var line in File.ReadLines(filePath, _isoLatin1Encoding)) { lineCounter++; // skip header if (lineCounter == 1) { continue; } // ignore blank lines if (string.IsNullOrEmpty(line)) { continue; } // parse var elements = line.Split(new String[] { "," }, StringSplitOptions.RemoveEmptyEntries); // ignore words with only one letter or digit //if (elements[4].Length < 2) continue; var word = new WordElement(); wordElements.Add(word); word.Place = int.Parse(elements[0]); word.Frequency = int.Parse(elements[1]); word.Word = elements[4]; // also add to the string sb.Append(word.Word); sb.Append("\r\n"); } words = sb.ToString(); }