public static string FormatWord(Word word) { string result = ""; foreach (Match m in AlphaCheck.Matches(word.ToString().ToLower().Trim())) result += m.Value; return result; }
public static float AverageLetterFrequency(Word word) { float result = 0; foreach (char c in word) result += LetterFrequency[c]; result /= word.Length; return result; }
public static float LetterFrequencyScore(Word word) { float result = 0; foreach(char c in word) { result += 1 - LetterFrequency[c]; } result -= FirstLetterFrequency[word.ToString()[0]]; return result; }
public static string MinitabFileLine(Word word) { int length = word.Length; float complexity = WordAnalyzer.WordComplexity(word); int vowelCount = WordAnalyzer.VowelCount(word); int consonantCount = length - vowelCount; char? firstChar; try { firstChar = word.ToString()[0]; } catch (IndexOutOfRangeException) { firstChar = null; } string result = word.ToString() + "\t" + word.Source + "\t" + word.SourceDateString + "\t" + word.Topic + "\t" + complexity + "\t" + Math.Log10(complexity) + "\t" + length + "\t" + WordAnalyzer.UniquenessFactor(word) + "\t" + word.UniqueChars + "\t" + vowelCount + "\t" + (vowelCount / (float)length) + "\t" + consonantCount + "\t" + (consonantCount / (float)length) + "\t" + (firstChar.ToString() ?? "") + "\t" + (WordAnalyzer.StartsWithVowel(word) ? "yes" : "no") + "\t" + word.DateCategory; return result; }
private async void BTN_SaveSample_Click(object sender, RoutedEventArgs e) { EnableLoadingFilm("Processing . . ."); string sampleText = TB_RandomSample.Text; // get source string source = SourceText; if (string.IsNullOrWhiteSpace(source)) { MessageBox.Show("Please enter a source.", "No Source", MessageBoxButton.OK); DisableLoadingFilm(); return; } // get the date (if it exists) DateTime? date = null; if (DP_Date.IsEnabled) { date = DP_Date.DisplayDate; } // get topic string topic = ""; if (TB_Topic.IsEnabled) { topic = TB_Topic.Text; } await Task.Run(() => { // get the sample of words List<string> sample = Sampler.Tokenize(sampleText); if (sample.Count <= 0) { MessageBox.Show("No Data To Save", "No Data", MessageBoxButton.OK); DisableLoadingFilm(); return; } // save the data foreach(string word in sample) { Word w = new Word(word, source, date, topic); Words.Add(w); } // sort the data Words = Words.OrderBy(w => w.ToString()).ToList(); OnPropertyChanged("SessionWords"); }); TB_RandomSample.Text = ""; DisableLoadingFilm(); }
static void Main(string[] args) { #region OpenFiles // get the file reader StreamReader reader; try { reader = new StreamReader(args[0]); } catch(Exception e) { Console.WriteLine("an error occurred while opening the file: " + e.Message); return; } // get the file writer if (!File.Exists(RESULT_FILE)) File.Create(RESULT_FILE); StreamWriter writer = new StreamWriter(RESULT_FILE); writer.WriteLine(DataFileManager.minitabFileHeader); #endregion Word word; int numWords = 0; HashSet<string> okWords = new HashSet<string>(); HashSet<string> ignoreWords = new HashSet<string>(); Console.WriteLine("Going through file...\n"); using (Hunspell checker = new Hunspell("en_us.aff", "en_us.dic")) { reader.ReadLine(); while (!reader.EndOfStream) { word = DataFileManager.MinitabFileLine(reader.ReadLine()); if (checker.Spell(word.ToString()) || okWords.Contains(word.ToString())) { writer.WriteLine(DataFileManager.MinitabFileLine(word)); } else if(!ignoreWords.Contains(word.ToString())) { ColorWrite("Word #: " + numWords, ConsoleColor.Green); Console.Write(word.Source + ": '" + word.ToString() + "'" + " was not recognized as a word. Do you want to keep it? (y/n): "); if (Console.ReadKey().Key == ConsoleKey.Y) { writer.WriteLine(DataFileManager.MinitabFileLine(word)); okWords.Add(word.ToString()); } else { Console.Write("\nDo you want to offer an alternate spelling? (y/n): "); if (Console.ReadKey().Key == ConsoleKey.Y) { Console.Write("\nspelling: "); string input = Console.ReadLine(); foreach(string s in Sampler.Tokenize(input)) { Word newWord = new Word(s, word.Source, word.SourceDate, word.Topic); writer.WriteLine(DataFileManager.MinitabFileLine(newWord)); okWords.Add(newWord.ToString()); } } else { ColorWrite("\nThe word will be ignored.", ConsoleColor.Red); ignoreWords.Add(word.ToString()); } } Console.WriteLine("\n"); } numWords++; } } // End the program ColorWrite(numWords + " words processed.", ConsoleColor.Green); reader.Close(); writer.Close(); }
public static int SequenceRepeats(Word word) { int count = 0; foreach (Match m in SequenceRepeatFinder.Matches(FormatWord(word))) { count += m.Groups[2].Length / m.Groups[1].Length; } return count >= 0 ? count : 0; }
public static int ScrabbleScore(Word word) { int count = 0; foreach (char c in FormatWord(word)) { // add the scores of each letter count += LetterValues[c]; } return count; }
public static int ConsonantCount(Word word) { // return the number of letters - the number that are vowels return word.Length - VowelCount(word); }
public static float WordComplexity(Word word) { float letterScore = UniquenessFactor(word) * (word.Length + LetterFrequencyScore(word)); float repeatScore = (SequenceRepeats(word) + SequentialCharRepeats(word)) / word.Length; return 10 * AverageLetterFrequency(word) * (letterScore - repeatScore) ; }
public static int VowelCount(Word word) { // simplify the format of the word string formattedWord = FormatWord(word); // the number of vowels that are in the word int count = 0; // go through each letter and check for the simple vowels foreach (char c in formattedWord) { if (Vowels.Contains(c)) count++; } // check for 'y' vowels count += YVowelCheck.Matches(formattedWord).Count; // return the count return count; }
public static float UniquenessFactor(Word word) { return (float)word.UniqueChars / word.Length; }
//TODO: see if we can make this work public static int SyllableCount(Word word) { return 0; }
public static bool StartsWithVowel(Word word) { return StartsWithVowelCheck.IsMatch(FormatWord(word)); }
static void Main(string[] args) { // input StreamReader reader = null; try { reader = new StreamReader(args[0]); } catch (IndexOutOfRangeException) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("No file to read from. Please give the file as a command line argument."); Console.ResetColor(); goto END; } catch (FileNotFoundException) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("File " + args[0] + " was not found."); Console.ResetColor(); goto END; } // output string resultFilePath = "./Results.txt"; if (!File.Exists(resultFilePath)) File.Create(resultFilePath); StreamWriter writer; try { writer = new StreamWriter(resultFilePath); } catch (IOException e) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("There was an error creating the Results.txt file: " + e.Message); Console.ResetColor(); goto END; } // make output Console.WriteLine("Generating output..."); Stopwatch sw = new Stopwatch(); sw.Start(); int wordCount = 0; writer.WriteLine("word\tcomplexity"); while (!reader.EndOfStream) { string[] words = reader.ReadLine().Split(' '); foreach(string s in words) { Word w = new Word(s); if (!string.IsNullOrWhiteSpace(w.ToString())) { float complexity = WordAnalyzer.WordComplexity(w); writer.WriteLine(w.ToString() + "\t" + complexity.ToString()); wordCount++; } } } writer.Close(); sw.Stop(); Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("Output successfully generated."); Console.WriteLine("Output took: " + sw.Elapsed.ToString() + " to analyze " + wordCount.ToString() + " words."); Console.ResetColor(); try { Process.Start(new ProcessStartInfo(".\\Results.txt")); } catch(Exception e) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("There was an error starting Results.txt: " + e.Message); Console.ResetColor(); } // quit END: Console.Write("Press any key to quit..."); Console.ReadKey(); }