public static List <xWordFrequencies> FindInFrequencies(string word) { ResetSQLiteConnection(); List <xWordFrequencies> xwfList = new List <xWordFrequencies>(); string query = string.Format("SELECT * FROM `wf_frequencies` WHERE `word` LIKE '{0}'", word); sql_cmd.CommandText = query; SQLiteDataReader Reader = sql_cmd.ExecuteReader(); if (!Reader.HasRows) { return(null); } while (Reader.Read()) { xWordFrequencies xwf = new xWordFrequencies() { id = Convert.ToInt64(GetDBInt64("id", Reader)), fileId = Convert.ToInt64(GetDBInt64("file_id", Reader)), word = GetDBString("word", Reader), frequency = GetDBInt("frequency", Reader), percentage = GetDBFloat("percentage", Reader), }; xwfList.Add(xwf); // subquery += string.Format("'{0}' OR ", Convert.ToInt64(GetDBInt64("file_id", Reader))); // subquery = subquery.Substring(0, subquery.Length - 4); } ; Reader.Close(); return(xwfList); }
private void olvFrequencies_SelectionChanged(object sender, EventArgs e) { string lblWordsCountPrefix = "Выделено слов: "; string lblWordsPercentagePrefix = "Выделено в процентах: "; string lblWordsFrequenciesPrefix = "Сумма выделенных частот: "; lblSelectedWordsCount.Text = lblWordsCountPrefix + olvFrequencies.SelectedObjects.Count.ToString(); float sumPercentage = 0; int sumFrequencies = 0; foreach (var obj in olvFrequencies.SelectedObjects) { xWordFrequencies xwf = (xWordFrequencies)obj; sumPercentage += xwf.percentage; sumFrequencies += xwf.frequency; } lblSelectedWordsPercentage.Text = lblWordsPercentagePrefix + sumPercentage.ToString("F") + "%"; lblSelectedFrequency.Text = lblWordsFrequenciesPrefix + sumFrequencies.ToString(); }
public static List <xWordFrequencies> GetFrequencies(long fileId) { string query = string.Format("SELECT * FROM wf_frequencies WHERE file_id={0}", fileId); List <xWordFrequencies> list = new List <xWordFrequencies>(); SQLiteCommand cmd = new SQLiteCommand(); cmd = sql_con.CreateCommand(); cmd.CommandText = query; SQLiteDataReader Reader = cmd.ExecuteReader(); if (!Reader.HasRows) { return(null); } try { while (Reader.Read()) { xWordFrequencies xwf = new xWordFrequencies() { id = Convert.ToInt64(GetDBInt64("id", Reader)), fileId = Convert.ToInt64(GetDBInt64("file_id", Reader)), word = GetDBString("word", Reader), rank = GetDBInt("rank", Reader), frequency = GetDBInt("frequency", Reader), percentage = GetDBFloat("percentage", Reader), }; list.Add(xwf); } } catch (Exception ex) { Utils.ErrLog("Ошибка при чтении xwf из БД", ex.Message); Utils.msgInformation("Ошибка при чтении БД, требуется обновление БД"); } Reader.Close(); return(list); }
public static List <xWordFrequencies> GetCombinedFrequencies() { // This function gets all frequencies between the range defined with dtFom and dtTO // Combine all frequencies List <xWordFrequencies> combinedfrequencies = new List <xWordFrequencies>(); foreach (xTextFile xtf in Utils.history) { combinedfrequencies.AddRange(xtf.frequencies); } // List for the all unique frequencies List <xWordFrequencies> allfrequencies = new List <xWordFrequencies>(); foreach (xWordFrequencies xwf in combinedfrequencies) { // If our list already has such word, don't add new element but change it xWordFrequencies existing = allfrequencies.Find(x => x.word.Equals(xwf.word)); if (existing != null) { // Combine frequency existing.frequency = existing.frequency + xwf.frequency; float freq = existing.frequency; existing.percentage = (freq / WORDS_COUNT) * 100; } else { float freq = xwf.frequency; xwf.percentage = (freq / WORDS_COUNT) * 100; allfrequencies.Add(xwf); } } return(allfrequencies); }
private void bgwCounter_DoWork(object sender, DoWorkEventArgs e) { foreach (xTextFile xFile in Utils.fList) { if (bgwCounter.CancellationPending) { bgwCounter.ReportProgress(-1, xFile); return; } bgwCounter.ReportProgress(-2, xFile); string contents = xFile.Processor.GetAllText(xFile.filePath); xFile.charactersCount = contents.Length; xFile.frequencies = new List <xWordFrequencies>(); var words = new Dictionary <string, int>(StringComparer.CurrentCultureIgnoreCase); string stRegExp = Utils.StgGetString("TxtRegExp"); var wordPattern = new Regex(stRegExp.Replace(@"\\", @"\").Trim()); xFile.wordsCount = wordPattern.Matches(contents).Count; if (xFile.wordsCount == 0) { continue; } // Check if exists if (DbHelper.ifExists(xFile.charactersCount, xFile.wordsCount)) { continue; } int progress = 0; foreach (Match match in wordPattern.Matches(contents)) { if (bgwCounter.CancellationPending) { bgwCounter.ReportProgress(-1, xFile); return; } progress++; int currentCount = 0; words.TryGetValue(match.Value, out currentCount); bgwCounter.ReportProgress(progress, xFile); currentCount++; words[match.Value] = currentCount; } // Add words to object's list of words with frequencies int rank = 0; foreach (var row in words.OrderByDescending(pair => pair.Value)) { xWordFrequencies xwf = new xWordFrequencies(); xwf.word = row.Key.ToLower(); xwf.word = xwf.word.Substring(0, 1).ToUpper() + xwf.word.Substring(1); xwf.frequency = row.Value; if (rank != 0) { // It's not the first iteration if (xFile.frequencies[xFile.frequencies.Count - 1].frequency > xwf.frequency) { rank++; } } else { rank++; } xwf.rank = rank; float freq = xwf.frequency; // Why it doesn't work with xwf.frequency? xwf.percentage = (freq / xFile.wordsCount) * 100; xFile.frequencies.Add(xwf); } xFile.uniqueWordsCount = xFile.frequencies.Count(); xFile.SaveFileInfo(); } bgwCounter.ReportProgress(-3, null); }
public static void Chistka() { ResetSQLiteConnection(); int iteration = -1; string query = string.Format("SELECT * FROM `wf_frequencies` Where `id` > " + iteration); List <xWordFrequencies> list = new List <xWordFrequencies>(); SQLiteCommand cmd = new SQLiteCommand(); cmd = sql_con.CreateCommand(); cmd.CommandText = query; SQLiteDataReader Reader = cmd.ExecuteReader(); if (!Reader.HasRows) { return; } int count = 145000; int removed = 0; try { using (StreamWriter sw_debug = new StreamWriter("output.log", true, Encoding.UTF8)) { using (StreamWriter sw_output = new StreamWriter("output.sql", true, Encoding.UTF8)) { while (Reader.Read()) { iteration++; xWordFrequencies xwf = new xWordFrequencies() { id = Convert.ToInt64(GetDBInt64("id", Reader)), fileId = Convert.ToInt64(GetDBInt64("file_id", Reader)), word = GetDBString("word", Reader), rank = GetDBInt("rank", Reader), frequency = GetDBInt("frequency", Reader), percentage = GetDBFloat("percentage", Reader), }; String line = xwf.word; line = line.Replace("1", "Ӏ"); line = line.Replace("I", "Ӏ"); line = line.Replace("l", "Ӏ"); line = line.Replace("ѐ", "ё"); line = line.Replace("e", "е"); line = line.Replace("a", "а"); line = line.Replace("p", "р"); line = line.Replace("o", "о"); line = line.Replace("i", "Ӏ"); line = line.Replace("l", "Ӏ"); line = line.Replace("k", "к"); line = line.Replace("x", "х"); line = line.Replace("y", "у"); line = line.Replace("n", "п"); line = line.Replace("m", "м"); line = line.Replace("c", "с"); line = line.Replace("r", "г"); line = line.Replace("u", "и"); line = line.Replace("Ѐ", "Ё"); line = line.Replace("E", "Е"); line = line.Replace("A", "А"); line = line.Replace("B", "В"); line = line.Replace("P", "Р"); line = line.Replace("O", "О"); line = line.Replace("I", "Ӏ"); line = line.Replace("K", "К"); line = line.Replace("X", "Х"); line = line.Replace("T", "Т"); line = line.Replace("M", "М"); line = line.Replace("C", "С"); xwf.word = line; sw_debug.Write(string.Format("{0}/{1} {2} ", iteration, count, line)); Debug.Write(string.Format("{0}/{1} {2} ", iteration, count, line)); // IS IT CYRILLIC ? if (!Regex.IsMatch(line, @"\A[\s\W\p{IsCyrillic}]*\z")) { // if not, remove // Debug.Write("ILLEGAL.. REMOVING..." + (RemoveReq(TABLE_FREQUENCIES, xwf.id) > 0 ? "OK" : "FAIL")); sw_debug.WriteLine(""); Debug.WriteLine(""); removed++; continue; } Dictionary <string, object> nameValueData = new Dictionary <string, object>(); nameValueData.Add("word", line); if ((UpdateReq("wf_frequencies", nameValueData, xwf.id) > 0)) { // OK sw_output.WriteLine("INSERT INTO `wf_frequencies` VALUES({0},{1},{2},'{3}',{4},{5})", xwf.id, xwf.fileId, xwf.rank, xwf.word, xwf.frequency, xwf.percentage); sw_debug.Write("Ok"); Debug.Write("Ok"); } else { // FAIL // Exception must mean that such element is already existing in the database, so remove it // Ideally I should have added their frequency but it's not that important so I am not doing it // Debug.Write("FAIL, REMOVING ..." + (RemoveReq(TABLE_FREQUENCIES, xwf.id) > 0 ? "OK" : "FAIL")); removed++; } sw_debug.WriteLine(""); Debug.WriteLine(""); } } sw_debug.WriteLine("END OF THE UNIVERSE!"); Debug.WriteLine("END OF THE UNIVERSE!"); sw_debug.WriteLine("REMOVED: " + removed); Debug.WriteLine("REMOVED: " + removed); } } catch (Exception ex) { Utils.ErrLog("Ошибка при чтении xwf из БД", ex.Message); Utils.msgInformation("Ошибка при чтении БД, требуется обновление БД"); } Reader.Close(); }