Ejemplo n.º 1
0
        static Dictionary <string, int> ReadWords(string location, string stopwords, string filename)
        {
            Morf morfer = new Morf(stopwords);
            Dictionary <string, int> words = new Dictionary <string, int>();

            using (var frws = File.OpenRead(location))
                using (var srws = new StreamReader(frws))
                {
                    if (!File.Exists($"{filename}.stems.besedilo.txt"))
                    {
                        using (StreamWriter file = new StreamWriter($"{filename}.stems.besedilo.txt"))
                        {
                            while (!srws.EndOfStream)
                            {
                                string line        = srws.ReadLine().ToLower().Trim();
                                string line_morfed = morfer.Stemify(line);
                                file.WriteLine(line_morfed);
                                string[] wordstoadd = Regex.Replace(line, "[^a-zA-Z 蚞ȊŽ]", "").Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
                                foreach (string word in wordstoadd)
                                {
                                    if (word.All(char.IsDigit))
                                    {
                                        continue;
                                    }
                                    if (words.ContainsKey(word))
                                    {
                                        words[word]++;
                                    }
                                    else
                                    {
                                        words.Add(word, 1);
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        Console.WriteLine($"Datoteka {filename}.stems.besedilo.txt že obstaja želite prepisati to datoteko (y/n) (privzeto y)");
                        string ans = Console.ReadLine().ToLower().Trim();
                        if (ans.Equals("y") || ans.Equals(""))
                        {
                            using (StreamWriter file = new StreamWriter($"{filename}.stems.besedilo.txt"))
                            {
                                while (!srws.EndOfStream)
                                {
                                    string line        = srws.ReadLine().ToLower();
                                    string line_morfed = morfer.Stemify(line);
                                    file.WriteLine(line_morfed);
                                    string[] wordstoadd = Regex.Replace(line, "[^a-zA-Z 蚞ȊŽ]", "").Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
                                    foreach (string word in wordstoadd)
                                    {
                                        if (word.All(char.IsDigit))
                                        {
                                            continue;
                                        }
                                        if (words.ContainsKey(word))
                                        {
                                            words[word]++;
                                        }
                                        else
                                        {
                                            words.Add(word, 1);
                                        }
                                    }
                                }
                            }
                        }
                        else
                        {
                            while (!srws.EndOfStream)
                            {
                                string   line       = srws.ReadLine().ToLower();
                                string[] wordstoadd = Regex.Replace(line, "[^a-zA-Z 蚞ȊŽ]", "").Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
                                foreach (string word in wordstoadd)
                                {
                                    if (word.All(char.IsDigit))
                                    {
                                        continue;
                                    }
                                    if (words.ContainsKey(word))
                                    {
                                        words[word]++;
                                    }
                                    else
                                    {
                                        words.Add(word, 1);
                                    }
                                }
                            }
                        }
                    }
                }
            return(words);
        }
Ejemplo n.º 2
0
        static void Main(string[] args)
        {
            Dictionary <string, int> words;
            Dictionary <string, int> words_filtered;
            Dictionary <string, int> words_morfed = new Dictionary <string, int>();
            string filename;

            string[] stopwords;

            if (args.Length < 1)
            {
                Console.WriteLine("Program je namenjen uporabi znotraj ukazne vrstice.");
                Console.WriteLine("Za normalno analizo uporabite ukaz");
                Console.WriteLine("Wordstat [Ime_datoteke]");
                Console.WriteLine("Za filtrirano analizo pa uporabite ta ukaz");
                Console.WriteLine("Wordstat [Ime_datoteke] [datoteka z besedami za izločitev]");
            }
            else if (args.Length == 1)
            {
                words    = ReadWords(args[0]);
                filename = Path.GetFileNameWithoutExtension(args[0]);
                int sum = 0;
                foreach (var word in words)
                {
                    sum += word.Value;
                }
                Dictionary <string, int> top10        = Sort(words, 10);
                Dictionary <string, int> words_sorted = Sort(words, words.Count);
                int           avgrLength    = AvgrLength(words);
                int           numowordless3 = WordsShorter3(words);
                int           numowordmore3 = Words3orLonger(words);
                List <string> towritelist   = new List <string>();
                foreach (var word in words_sorted)
                {
                    towritelist.Add($"{word.Key}  {word.Value}");
                }
                String[] towrite = towritelist.ToArray();
                if (!File.Exists($"{filename}.besede.txt"))
                {
                    File.WriteAllLines($"{filename}.besede.txt", towrite);
                }
                else
                {
                    Console.WriteLine($"Datoteka {filename}.besede.txt že obstaja želite prepisati to datoteko (y/n) (privzeto y)");
                    string ans = Console.ReadLine();
                    if (ans.Equals("y") || ans.Equals(""))
                    {
                        File.WriteAllLines($"{filename}.besede.txt", towrite);
                    }
                    else if (ans.Equals("n"))
                    {
                        Console.WriteLine("Izpis statistike brez shranjevanja seznama besed");
                    }
                    else
                    {
                        Console.WriteLine("Something went wrong");
                    }
                }
                Console.WriteLine($"Skupno število besed: {sum}");
                Console.WriteLine($"Število unikatnih besed: {words.Count}");
                Console.WriteLine($"10 najpogostejših besed: ");
                int placement = 0;
                foreach (var word in top10)
                {
                    placement++;
                    Console.WriteLine(String.Format("{0,3}.{1,15}|{2,5}", placement, word.Key, word.Value));
                }
                Console.WriteLine($"Povprečna dolžina besede: {avgrLength}");
                Console.WriteLine($"Število kratkih besed (manj kot 3 znaki): {numowordless3}");
                Console.WriteLine($"Število dolgih besed (več kot 3 znaki): {numowordmore3}");
            }
            else
            {
                filename       = Path.GetFileNameWithoutExtension(args[0]);
                words          = ReadWords(args[0], args[1], filename);
                stopwords      = File.ReadAllLines(args[1]);
                words_filtered = Filter(words, stopwords);
                Morf morfer = new Morf(words_filtered);
                foreach (var word in words_filtered)
                {
                    KeyValuePair <string, int> morfed = morfer.Stemify(word);
                    if (!words_morfed.ContainsKey(morfed.Key))
                    {
                        words_morfed.Add(morfed.Key, morfed.Value);
                    }
                    else
                    {
                        words_morfed[morfed.Key] += morfed.Value;
                    }
                }
                int sum  = 0;
                int sumf = 0;
                foreach (var word in words)
                {
                    sum += word.Value;
                }
                foreach (var word in words_filtered)
                {
                    sumf += word.Value;
                }
                Dictionary <string, int> top10               = Sort(words_filtered, 10);
                Dictionary <string, int> words_sorted        = Sort(words_filtered, words_filtered.Count);
                Dictionary <string, int> words_morfed_sorted = Sort(words_morfed, words_morfed.Count);
                int           avgrLength    = AvgrLength(words_filtered);
                int           numowordless3 = WordsShorter3(words_filtered);
                int           numowordmore3 = Words3orLonger(words_filtered);
                List <string> towritelist   = new List <string>();
                foreach (var word in words_sorted)
                {
                    towritelist.Add($"{word.Key}  {word.Value}");
                }
                String[] towrite = towritelist.ToArray();
                if (!File.Exists($"{filename}.besede.txt"))
                {
                    File.WriteAllLines($"{filename}.besede.txt", towrite);
                }
                else
                {
                    Console.WriteLine($"Datoteka {filename}.besede.txt že obstaja želite prepisati to datoteko (y/n) (privzeto y)");
                    Console.WriteLine();
                    string ans = Console.ReadLine();
                    if (ans.Equals("y") || ans.Equals(""))
                    {
                        File.WriteAllLines($"{filename}.besede.txt", towrite);
                    }
                    else if (ans.Equals("n"))
                    {
                        Console.WriteLine("Izpis statistike brez shranjevanja seznama besed");
                    }
                    else
                    {
                        Console.WriteLine("Something went wrong");
                    }
                }
                if (!File.Exists($"{filename}.stems.txt"))
                {
                    using (StreamWriter file = new StreamWriter($"{filename}.stems.txt"))
                    {
                        foreach (var word in words_morfed_sorted)
                        {
                            file.WriteLine($"{word.Key}  {word.Value}");
                        }
                    }
                }
                else
                {
                    Console.WriteLine($"Datoteka {filename}.stems.txt že obstaja želite prepisati to datoteko (y/n) (privzeto y)");
                    string ans = Console.ReadLine().ToLower().Trim();
                    if (ans.Equals("y") || ans.Equals(""))
                    {
                        using (StreamWriter file = new StreamWriter($"{filename}.stems.txt"))
                        {
                            foreach (var word in words_morfed_sorted)
                            {
                                file.WriteLine($"{word.Key}  {word.Value}");
                            }
                        }
                    }
                    else if (ans.Equals("n"))
                    {
                        Console.WriteLine("Izpis statistike brez shranjevanja novega teksta");
                    }
                    else
                    {
                        Console.WriteLine("Something went wrong");
                    }
                }
                Console.WriteLine($"Skupno število besed: {sum}");
                Console.WriteLine($"Skupno število besed po filtriranju: {sumf}");
                Console.WriteLine($"Skupno število filtriranih besed: {sum-sumf}");
                Console.WriteLine($"Število unikatnih besed: {words.Count}");
                Console.WriteLine($"Število unikatnih besed filtriranih: {words.Count-words_filtered.Count}");
                Console.WriteLine($"10 najpogostejših besed: ");
                int placement = 0;
                foreach (var word in top10)
                {
                    placement++;
                    Console.WriteLine(String.Format("{0,3}.{1,15}|{2,5}", placement, word.Key, word.Value));
                }
                Console.WriteLine($"Povprečna dolžina besede: {avgrLength}");
                Console.WriteLine($"Število kratkih besed (manj kot 3 znaki): {numowordless3}");
                Console.WriteLine($"Število dolgih besed (več kot 3 znaki): {numowordmore3}");
            }
        }