static Dictionary <string, int> ReadWords(string location, string stopwords, string filename) { Morf morfer = new Morf(stopwords); Dictionary <string, int> words = new Dictionary <string, int>(); using (var frws = File.OpenRead(location)) using (var srws = new StreamReader(frws)) { if (!File.Exists($"{filename}.stems.besedilo.txt")) { using (StreamWriter file = new StreamWriter($"{filename}.stems.besedilo.txt")) { while (!srws.EndOfStream) { string line = srws.ReadLine().ToLower().Trim(); string line_morfed = morfer.Stemify(line); file.WriteLine(line_morfed); string[] wordstoadd = Regex.Replace(line, "[^a-zA-Z 蚞ȊŽ]", "").Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); foreach (string word in wordstoadd) { if (word.All(char.IsDigit)) { continue; } if (words.ContainsKey(word)) { words[word]++; } else { words.Add(word, 1); } } } } } else { Console.WriteLine($"Datoteka {filename}.stems.besedilo.txt že obstaja želite prepisati to datoteko (y/n) (privzeto y)"); string ans = Console.ReadLine().ToLower().Trim(); if (ans.Equals("y") || ans.Equals("")) { using (StreamWriter file = new StreamWriter($"{filename}.stems.besedilo.txt")) { while (!srws.EndOfStream) { string line = srws.ReadLine().ToLower(); string line_morfed = morfer.Stemify(line); file.WriteLine(line_morfed); string[] wordstoadd = Regex.Replace(line, "[^a-zA-Z 蚞ȊŽ]", "").Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); foreach (string word in wordstoadd) { if (word.All(char.IsDigit)) { continue; } if (words.ContainsKey(word)) { words[word]++; } else { words.Add(word, 1); } } } } } else { while (!srws.EndOfStream) { string line = srws.ReadLine().ToLower(); string[] wordstoadd = Regex.Replace(line, "[^a-zA-Z 蚞ȊŽ]", "").Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); foreach (string word in wordstoadd) { if (word.All(char.IsDigit)) { continue; } if (words.ContainsKey(word)) { words[word]++; } else { words.Add(word, 1); } } } } } } return(words); }
static void Main(string[] args) { Dictionary <string, int> words; Dictionary <string, int> words_filtered; Dictionary <string, int> words_morfed = new Dictionary <string, int>(); string filename; string[] stopwords; if (args.Length < 1) { Console.WriteLine("Program je namenjen uporabi znotraj ukazne vrstice."); Console.WriteLine("Za normalno analizo uporabite ukaz"); Console.WriteLine("Wordstat [Ime_datoteke]"); Console.WriteLine("Za filtrirano analizo pa uporabite ta ukaz"); Console.WriteLine("Wordstat [Ime_datoteke] [datoteka z besedami za izločitev]"); } else if (args.Length == 1) { words = ReadWords(args[0]); filename = Path.GetFileNameWithoutExtension(args[0]); int sum = 0; foreach (var word in words) { sum += word.Value; } Dictionary <string, int> top10 = Sort(words, 10); Dictionary <string, int> words_sorted = Sort(words, words.Count); int avgrLength = AvgrLength(words); int numowordless3 = WordsShorter3(words); int numowordmore3 = Words3orLonger(words); List <string> towritelist = new List <string>(); foreach (var word in words_sorted) { towritelist.Add($"{word.Key} {word.Value}"); } String[] towrite = towritelist.ToArray(); if (!File.Exists($"{filename}.besede.txt")) { File.WriteAllLines($"{filename}.besede.txt", towrite); } else { Console.WriteLine($"Datoteka {filename}.besede.txt že obstaja želite prepisati to datoteko (y/n) (privzeto y)"); string ans = Console.ReadLine(); if (ans.Equals("y") || ans.Equals("")) { File.WriteAllLines($"{filename}.besede.txt", towrite); } else if (ans.Equals("n")) { Console.WriteLine("Izpis statistike brez shranjevanja seznama besed"); } else { Console.WriteLine("Something went wrong"); } } Console.WriteLine($"Skupno število besed: {sum}"); Console.WriteLine($"Število unikatnih besed: {words.Count}"); Console.WriteLine($"10 najpogostejših besed: "); int placement = 0; foreach (var word in top10) { placement++; Console.WriteLine(String.Format("{0,3}.{1,15}|{2,5}", placement, word.Key, word.Value)); } Console.WriteLine($"Povprečna dolžina besede: {avgrLength}"); Console.WriteLine($"Število kratkih besed (manj kot 3 znaki): {numowordless3}"); Console.WriteLine($"Število dolgih besed (več kot 3 znaki): {numowordmore3}"); } else { filename = Path.GetFileNameWithoutExtension(args[0]); words = ReadWords(args[0], args[1], filename); stopwords = File.ReadAllLines(args[1]); words_filtered = Filter(words, stopwords); Morf morfer = new Morf(words_filtered); foreach (var word in words_filtered) { KeyValuePair <string, int> morfed = morfer.Stemify(word); if (!words_morfed.ContainsKey(morfed.Key)) { words_morfed.Add(morfed.Key, morfed.Value); } else { words_morfed[morfed.Key] += morfed.Value; } } int sum = 0; int sumf = 0; foreach (var word in words) { sum += word.Value; } foreach (var word in words_filtered) { sumf += word.Value; } Dictionary <string, int> top10 = Sort(words_filtered, 10); Dictionary <string, int> words_sorted = Sort(words_filtered, words_filtered.Count); Dictionary <string, int> words_morfed_sorted = Sort(words_morfed, words_morfed.Count); int avgrLength = AvgrLength(words_filtered); int numowordless3 = WordsShorter3(words_filtered); int numowordmore3 = Words3orLonger(words_filtered); List <string> towritelist = new List <string>(); foreach (var word in words_sorted) { towritelist.Add($"{word.Key} {word.Value}"); } String[] towrite = towritelist.ToArray(); if (!File.Exists($"{filename}.besede.txt")) { File.WriteAllLines($"{filename}.besede.txt", towrite); } else { Console.WriteLine($"Datoteka {filename}.besede.txt že obstaja želite prepisati to datoteko (y/n) (privzeto y)"); Console.WriteLine(); string ans = Console.ReadLine(); if (ans.Equals("y") || ans.Equals("")) { File.WriteAllLines($"{filename}.besede.txt", towrite); } else if (ans.Equals("n")) { Console.WriteLine("Izpis statistike brez shranjevanja seznama besed"); } else { Console.WriteLine("Something went wrong"); } } if (!File.Exists($"{filename}.stems.txt")) { using (StreamWriter file = new StreamWriter($"{filename}.stems.txt")) { foreach (var word in words_morfed_sorted) { file.WriteLine($"{word.Key} {word.Value}"); } } } else { Console.WriteLine($"Datoteka {filename}.stems.txt že obstaja želite prepisati to datoteko (y/n) (privzeto y)"); string ans = Console.ReadLine().ToLower().Trim(); if (ans.Equals("y") || ans.Equals("")) { using (StreamWriter file = new StreamWriter($"{filename}.stems.txt")) { foreach (var word in words_morfed_sorted) { file.WriteLine($"{word.Key} {word.Value}"); } } } else if (ans.Equals("n")) { Console.WriteLine("Izpis statistike brez shranjevanja novega teksta"); } else { Console.WriteLine("Something went wrong"); } } Console.WriteLine($"Skupno število besed: {sum}"); Console.WriteLine($"Skupno število besed po filtriranju: {sumf}"); Console.WriteLine($"Skupno število filtriranih besed: {sum-sumf}"); Console.WriteLine($"Število unikatnih besed: {words.Count}"); Console.WriteLine($"Število unikatnih besed filtriranih: {words.Count-words_filtered.Count}"); Console.WriteLine($"10 najpogostejših besed: "); int placement = 0; foreach (var word in top10) { placement++; Console.WriteLine(String.Format("{0,3}.{1,15}|{2,5}", placement, word.Key, word.Value)); } Console.WriteLine($"Povprečna dolžina besede: {avgrLength}"); Console.WriteLine($"Število kratkih besed (manj kot 3 znaki): {numowordless3}"); Console.WriteLine($"Število dolgih besed (več kot 3 znaki): {numowordmore3}"); } }