예제 #1
0
        public void Read10000Format(string filePath)
        {
            var sb = new StringBuilder();
            int lineCounter = 0;

            // read in the dictionary file in the wiktionary_frequency_list.txt format
            foreach (var line in File.ReadLines(filePath, _isoLatin1Encoding)) {
                lineCounter++;

                // ignore blank lines
                if (string.IsNullOrEmpty(line))
                    continue;

                // parse
                var elements = line.Split(new String[] {
                                          	" "
                                          }, StringSplitOptions.RemoveEmptyEntries);

                // ignore words with only one letter or digit
                //if (elements[1].Length < 2) continue;

                var word = new WordElement();
                wordElements.Add(word);

                word.Place = lineCounter;
                word.Frequency = int.Parse(elements[0]);
                word.Word = elements[1];

                // also add to the string
                sb.Append(word.Word);
                sb.Append("\r\n");
            }

            words = sb.ToString();
        }
예제 #2
0
        public void Read10KFormat(string filePath)
        {
            var sb          = new StringBuilder();
            int lineCounter = 0;

            // read in the dictionary file in the ord10k.csv format
            foreach (var line in File.ReadLines(filePath, _isoLatin1Encoding))
            {
                lineCounter++;

                // skip header
                if (lineCounter == 1)
                {
                    continue;
                }

                // ignore blank lines
                if (string.IsNullOrEmpty(line))
                {
                    continue;
                }

                // parse
                var elements = line.Split(new String[] {
                    ","
                }, StringSplitOptions.RemoveEmptyEntries);

                // ignore words with only one letter or digit
                //if (elements[4].Length < 2) continue;

                var word = new WordElement();
                wordElements.Add(word);

                word.Place     = int.Parse(elements[0]);
                word.Frequency = int.Parse(elements[1]);
                word.Word      = elements[4];

                // also add to the string
                sb.Append(word.Word);
                sb.Append("\r\n");
            }

            words = sb.ToString();
        }