Beispiel #1
0
        public static List <xWordFrequencies> FindInFrequencies(string word)
        {
            ResetSQLiteConnection();
            List <xWordFrequencies> xwfList = new List <xWordFrequencies>();
            string query = string.Format("SELECT * FROM `wf_frequencies` WHERE `word` LIKE '{0}'", word);

            sql_cmd.CommandText = query;
            SQLiteDataReader Reader = sql_cmd.ExecuteReader();

            if (!Reader.HasRows)
            {
                return(null);
            }

            while (Reader.Read())
            {
                xWordFrequencies xwf = new xWordFrequencies()
                {
                    id         = Convert.ToInt64(GetDBInt64("id", Reader)),
                    fileId     = Convert.ToInt64(GetDBInt64("file_id", Reader)),
                    word       = GetDBString("word", Reader),
                    frequency  = GetDBInt("frequency", Reader),
                    percentage = GetDBFloat("percentage", Reader),
                };

                xwfList.Add(xwf);

                //   subquery += string.Format("'{0}' OR ", Convert.ToInt64(GetDBInt64("file_id", Reader)));
                //   subquery = subquery.Substring(0, subquery.Length - 4);
            }
            ;
            Reader.Close();

            return(xwfList);
        }
Beispiel #2
0
        private void olvFrequencies_SelectionChanged(object sender, EventArgs e)
        {
            string lblWordsCountPrefix       = "Выделено слов: ";
            string lblWordsPercentagePrefix  = "Выделено в процентах: ";
            string lblWordsFrequenciesPrefix = "Сумма выделенных частот: ";

            lblSelectedWordsCount.Text = lblWordsCountPrefix + olvFrequencies.SelectedObjects.Count.ToString();
            float sumPercentage  = 0;
            int   sumFrequencies = 0;

            foreach (var obj in olvFrequencies.SelectedObjects)
            {
                xWordFrequencies xwf = (xWordFrequencies)obj;
                sumPercentage  += xwf.percentage;
                sumFrequencies += xwf.frequency;
            }
            lblSelectedWordsPercentage.Text = lblWordsPercentagePrefix + sumPercentage.ToString("F") + "%";
            lblSelectedFrequency.Text       = lblWordsFrequenciesPrefix + sumFrequencies.ToString();
        }
Beispiel #3
0
        public static List <xWordFrequencies> GetFrequencies(long fileId)
        {
            string query = string.Format("SELECT * FROM wf_frequencies WHERE file_id={0}", fileId);
            List <xWordFrequencies> list = new List <xWordFrequencies>();

            SQLiteCommand cmd = new SQLiteCommand();

            cmd             = sql_con.CreateCommand();
            cmd.CommandText = query;
            SQLiteDataReader Reader = cmd.ExecuteReader();

            if (!Reader.HasRows)
            {
                return(null);
            }

            try
            {
                while (Reader.Read())
                {
                    xWordFrequencies xwf = new xWordFrequencies()
                    {
                        id         = Convert.ToInt64(GetDBInt64("id", Reader)),
                        fileId     = Convert.ToInt64(GetDBInt64("file_id", Reader)),
                        word       = GetDBString("word", Reader),
                        rank       = GetDBInt("rank", Reader),
                        frequency  = GetDBInt("frequency", Reader),
                        percentage = GetDBFloat("percentage", Reader),
                    };

                    list.Add(xwf);
                }
            }
            catch (Exception ex)
            {
                Utils.ErrLog("Ошибка при чтении xwf из БД", ex.Message);
                Utils.msgInformation("Ошибка при чтении БД, требуется обновление БД");
            }
            Reader.Close();

            return(list);
        }
Beispiel #4
0
        public static List <xWordFrequencies> GetCombinedFrequencies()
        {
            // This function gets all frequencies between the range defined with dtFom and dtTO

            // Combine all frequencies
            List <xWordFrequencies> combinedfrequencies = new List <xWordFrequencies>();

            foreach (xTextFile xtf in Utils.history)
            {
                combinedfrequencies.AddRange(xtf.frequencies);
            }

            // List for the all unique frequencies
            List <xWordFrequencies> allfrequencies = new List <xWordFrequencies>();

            foreach (xWordFrequencies xwf in combinedfrequencies)
            {
                // If our list already has such word, don't add new element but change it
                xWordFrequencies existing = allfrequencies.Find(x => x.word.Equals(xwf.word));
                if (existing != null)
                {
                    // Combine frequency
                    existing.frequency = existing.frequency + xwf.frequency;
                    float freq = existing.frequency;
                    existing.percentage = (freq / WORDS_COUNT) * 100;
                }
                else
                {
                    float freq = xwf.frequency;
                    xwf.percentage = (freq / WORDS_COUNT) * 100;
                    allfrequencies.Add(xwf);
                }
            }

            return(allfrequencies);
        }
Beispiel #5
0
        private void bgwCounter_DoWork(object sender, DoWorkEventArgs e)
        {
            foreach (xTextFile xFile in Utils.fList)
            {
                if (bgwCounter.CancellationPending)
                {
                    bgwCounter.ReportProgress(-1, xFile);
                    return;
                }
                bgwCounter.ReportProgress(-2, xFile);

                string contents = xFile.Processor.GetAllText(xFile.filePath);


                xFile.charactersCount = contents.Length;

                xFile.frequencies = new List <xWordFrequencies>();
                var    words       = new Dictionary <string, int>(StringComparer.CurrentCultureIgnoreCase);
                string stRegExp    = Utils.StgGetString("TxtRegExp");
                var    wordPattern = new Regex(stRegExp.Replace(@"\\", @"\").Trim());
                xFile.wordsCount = wordPattern.Matches(contents).Count;
                if (xFile.wordsCount == 0)
                {
                    continue;
                }
                // Check if exists
                if (DbHelper.ifExists(xFile.charactersCount, xFile.wordsCount))
                {
                    continue;
                }

                int progress = 0;
                foreach (Match match in wordPattern.Matches(contents))
                {
                    if (bgwCounter.CancellationPending)
                    {
                        bgwCounter.ReportProgress(-1, xFile);
                        return;
                    }
                    progress++;
                    int currentCount = 0;
                    words.TryGetValue(match.Value, out currentCount);
                    bgwCounter.ReportProgress(progress, xFile);
                    currentCount++;
                    words[match.Value] = currentCount;
                }

                // Add words to object's list of words with frequencies
                int rank = 0;
                foreach (var row in words.OrderByDescending(pair => pair.Value))
                {
                    xWordFrequencies xwf = new xWordFrequencies();
                    xwf.word      = row.Key.ToLower();
                    xwf.word      = xwf.word.Substring(0, 1).ToUpper() + xwf.word.Substring(1);
                    xwf.frequency = row.Value;

                    if (rank != 0)
                    {
                        // It's not the first iteration
                        if (xFile.frequencies[xFile.frequencies.Count - 1].frequency > xwf.frequency)
                        {
                            rank++;
                        }
                    }
                    else
                    {
                        rank++;
                    }

                    xwf.rank = rank;
                    float freq = xwf.frequency;

                    // Why it doesn't work with xwf.frequency?
                    xwf.percentage = (freq / xFile.wordsCount) * 100;
                    xFile.frequencies.Add(xwf);
                }
                xFile.uniqueWordsCount = xFile.frequencies.Count();
                xFile.SaveFileInfo();
            }

            bgwCounter.ReportProgress(-3, null);
        }
Beispiel #6
0
        public static void Chistka()
        {
            ResetSQLiteConnection();
            int iteration = -1;


            string query = string.Format("SELECT * FROM `wf_frequencies` Where `id` > " + iteration);
            List <xWordFrequencies> list = new List <xWordFrequencies>();
            SQLiteCommand           cmd  = new SQLiteCommand();

            cmd             = sql_con.CreateCommand();
            cmd.CommandText = query;
            SQLiteDataReader Reader = cmd.ExecuteReader();

            if (!Reader.HasRows)
            {
                return;
            }
            int count   = 145000;
            int removed = 0;

            try
            {
                using (StreamWriter sw_debug = new StreamWriter("output.log", true, Encoding.UTF8))
                {
                    using (StreamWriter sw_output = new StreamWriter("output.sql", true, Encoding.UTF8))
                    {
                        while (Reader.Read())
                        {
                            iteration++;

                            xWordFrequencies xwf = new xWordFrequencies()
                            {
                                id         = Convert.ToInt64(GetDBInt64("id", Reader)),
                                fileId     = Convert.ToInt64(GetDBInt64("file_id", Reader)),
                                word       = GetDBString("word", Reader),
                                rank       = GetDBInt("rank", Reader),
                                frequency  = GetDBInt("frequency", Reader),
                                percentage = GetDBFloat("percentage", Reader),
                            };

                            String line = xwf.word;

                            line = line.Replace("1", "Ӏ");
                            line = line.Replace("I", "Ӏ");
                            line = line.Replace("l", "Ӏ");

                            line = line.Replace("ѐ", "ё");
                            line = line.Replace("e", "е");
                            line = line.Replace("a", "а");
                            line = line.Replace("p", "р");
                            line = line.Replace("o", "о");
                            line = line.Replace("i", "Ӏ");
                            line = line.Replace("l", "Ӏ");
                            line = line.Replace("k", "к");
                            line = line.Replace("x", "х");
                            line = line.Replace("y", "у");
                            line = line.Replace("n", "п");
                            line = line.Replace("m", "м");
                            line = line.Replace("c", "с");
                            line = line.Replace("r", "г");
                            line = line.Replace("u", "и");

                            line = line.Replace("Ѐ", "Ё");
                            line = line.Replace("E", "Е");
                            line = line.Replace("A", "А");
                            line = line.Replace("B", "В");
                            line = line.Replace("P", "Р");
                            line = line.Replace("O", "О");
                            line = line.Replace("I", "Ӏ");
                            line = line.Replace("K", "К");
                            line = line.Replace("X", "Х");
                            line = line.Replace("T", "Т");
                            line = line.Replace("M", "М");
                            line = line.Replace("C", "С");

                            xwf.word = line;

                            sw_debug.Write(string.Format("{0}/{1} {2} ", iteration, count, line));
                            Debug.Write(string.Format("{0}/{1} {2} ", iteration, count, line));

                            // IS IT CYRILLIC ?
                            if (!Regex.IsMatch(line, @"\A[\s\W\p{IsCyrillic}]*\z"))
                            {
                                // if not, remove
                                // Debug.Write("ILLEGAL.. REMOVING..." + (RemoveReq(TABLE_FREQUENCIES, xwf.id) > 0 ? "OK" : "FAIL"));
                                sw_debug.WriteLine("");
                                Debug.WriteLine("");
                                removed++;
                                continue;
                            }

                            Dictionary <string, object> nameValueData = new Dictionary <string, object>();
                            nameValueData.Add("word", line);

                            if ((UpdateReq("wf_frequencies", nameValueData, xwf.id) > 0))
                            {
                                // OK
                                sw_output.WriteLine("INSERT INTO `wf_frequencies` VALUES({0},{1},{2},'{3}',{4},{5})", xwf.id, xwf.fileId, xwf.rank, xwf.word, xwf.frequency, xwf.percentage);
                                sw_debug.Write("Ok");
                                Debug.Write("Ok");
                            }
                            else
                            {
                                // FAIL
                                // Exception must mean that such element is already existing in the database, so remove it
                                // Ideally I should have added their frequency but it's not that important so I am not doing it
                                // Debug.Write("FAIL, REMOVING ..." + (RemoveReq(TABLE_FREQUENCIES, xwf.id) > 0 ? "OK" : "FAIL"));
                                removed++;
                            }

                            sw_debug.WriteLine("");
                            Debug.WriteLine("");
                        }
                    }
                    sw_debug.WriteLine("END OF THE UNIVERSE!");
                    Debug.WriteLine("END OF THE UNIVERSE!");
                    sw_debug.WriteLine("REMOVED: " + removed);
                    Debug.WriteLine("REMOVED: " + removed);
                }
            }
            catch (Exception ex)
            {
                Utils.ErrLog("Ошибка при чтении xwf из БД", ex.Message);
                Utils.msgInformation("Ошибка при чтении БД, требуется обновление БД");
            }
            Reader.Close();
        }