public ProgressWindow(string sourceText, Vocabulary vocabulary) { InitializeComponent(); SetLocalization(); wndProgress.Title = Resources["creatingVocabulary"].ToString() ; FinishedSuccesfull = false; mProgressThread = new Thread(() => { FillVocabulary(sourceText, vocabulary); FinishedSuccesfull = true; Dispatcher.Invoke((Action)(() => this.Close())); }); mProgressThread.Start(); }
public static int CreateVocabulary(Vocabulary vocabulary) { if (string.IsNullOrWhiteSpace(vocabulary.Title) || vocabulary.Title.Length > 200 || vocabulary.Note.Length > 500) return 0; using (SqlConnection connection = Connection.SqlConnection) { using (SqlCommand command = connection.CreateCommand()) { command.CommandType = CommandType.StoredProcedure; command.CommandText = "CreateVocabulary"; command.Parameters.AddWithValue("@title", vocabulary.Title); command.Parameters.AddWithValue("@baseDictionaryId", vocabulary.BaseDictionaryId); command.Parameters.AddWithValue("@note", vocabulary.Note); SqlParameter returnedValue = command.Parameters.Add("return", SqlDbType.Int); returnedValue.Direction = ParameterDirection.ReturnValue; command.ExecuteNonQuery(); return (int)returnedValue.Value; } } }
private void btnParseText_Click(object sender, RoutedEventArgs e) { if (string.IsNullOrWhiteSpace(txtSourceText.Text)) { MessageBox.Show(Resources["sourceTextRequired"].ToString(), AppName, MessageBoxButton.OK, MessageBoxImage.Warning); return; } if (string.IsNullOrWhiteSpace(txtVocabularyTitle.Text) || txtVocabularyTitle.Text.Length > 200) { MessageBox.Show(Resources["enterVocabularyTitle"].ToString(), AppName, MessageBoxButton.OK, MessageBoxImage.Information); return; } if (cbBasicDictionary.SelectedItem == null) { MessageBox.Show(Resources["selectBasicDictionary"].ToString(), AppName, MessageBoxButton.OK, MessageBoxImage.Information); return; } if (txtVocabularyNote.Text.Length > 500) { MessageBox.Show(Resources["noteSizeLimited"].ToString(), AppName, MessageBoxButton.OK, MessageBoxImage.Information); return; } int baseDictionaryId = ((Dictionary)cbBasicDictionary.SelectedItem).Id; Vocabulary vocabulary = new Vocabulary(0, txtVocabularyTitle.Text, baseDictionaryId, txtVocabularyNote.Text); int vocabularyId = DB.Vocabulary.CreateVocabulary(vocabulary); if (vocabularyId != 0) { vocabulary = new Vocabulary(vocabularyId, txtVocabularyTitle.Text, baseDictionaryId, txtVocabularyNote.Text); ProgressWindow progressWindow = new ProgressWindow(txtSourceText.Text, vocabulary); progressWindow.ShowDialog(); UpdateVocabulariesList(); if (progressWindow.FinishedSuccesfull) MessageBox.Show(Resources["vocabularyCreated"].ToString(), AppName, MessageBoxButton.OK, MessageBoxImage.Information); } else { MessageBox.Show(Resources["cantCreateVocabulary"].ToString(), AppName, MessageBoxButton.OK, MessageBoxImage.Error); } }
private void FillVocabulary(string sourceText, Vocabulary vocabulary, string languageCode = null) { UpdateProgressBarDelegate updatePbDelegate = new UpdateProgressBarDelegate(pbProgress.SetValue); Dispatcher.Invoke(updatePbDelegate, System.Windows.Threading.DispatcherPriority.Background, new object[] { ProgressBar.ValueProperty, 0.0 }); StringBuilder text = new StringBuilder(sourceText); text.Replace("-\r\n", ""); string result = text.ToString(); string tempResult = Regex.Replace(result, "-", ""); tempResult = tempResult.Replace("?", "?."); tempResult = tempResult.Replace("!", "!."); tempResult = Regex.Replace(tempResult, @"\s+\n+", " \n"); tempResult = Regex.Replace(tempResult, @"[ ]+", " "); HashSet<string> sentences = new HashSet<string>(tempResult.Split('.')); result = Regex.Replace(result, @"[’']\w+", " "); result = Regex.Replace(result, @"[^\w\-]", " "); result = result.Replace("\r\n", " "); result = Regex.Replace(result, @"\s+", " "); result = result.ToLower(); List<string> wordsList = new List<string>(result.Split(' ')); Debug.Print("Total words count: " + wordsList.Count.ToString()); HashSet<string> distinctWords = new HashSet<string>(wordsList); Debug.Print("Distinct words count: " + distinctWords.Count.ToString()); HashSet<string> tempDistinctWordsList = new HashSet<string>(distinctWords); Debug.Print("Start creating phrases"); HashSet<string> distinct2WordsComb = new HashSet<string>(); HashSet<string> distinct3WordsComb = new HashSet<string>(); HashSet<string> distinct4WordsComb = new HashSet<string>(); HashSet<string> distinct5WordsComb = new HashSet<string>(); string firstWord = " "; string secondWord = " "; string thirdWord = " "; string fourthWord = " "; foreach (string word in wordsList) { StringBuilder combination1 = new StringBuilder(fourthWord); combination1.Append(" ").Append(word); distinct2WordsComb.Add(combination1.ToString()); StringBuilder combination2 = new StringBuilder(thirdWord); combination2.Append(" ").Append(combination1); distinct3WordsComb.Add(combination2.ToString()); StringBuilder combination3 = new StringBuilder(secondWord); combination3.Append(" ").Append(combination2); distinct4WordsComb.Add(combination3.ToString()); StringBuilder combination4 = new StringBuilder(firstWord); combination4.Append(" ").Append(combination3); distinct5WordsComb.Add(combination4.ToString()); firstWord = secondWord; secondWord = thirdWord; thirdWord = fourthWord; fourthWord = word; } Debug.Print("2 words phrases count: {0}", distinct2WordsComb.Count); Debug.Print("3 words phrases count: {0}", distinct3WordsComb.Count); Debug.Print("4 words phrases count: {0}", distinct4WordsComb.Count); List<Term> termsFromDic = DB.Dictionary.GetListOfTerms(vocabulary.BaseDictionaryId); distinctWords.UnionWith(distinct2WordsComb); distinctWords.UnionWith(distinct3WordsComb); distinctWords.UnionWith(distinct4WordsComb); distinctWords.UnionWith(distinct5WordsComb); Debug.Print("Creating words dictionary"); List<LearnedWord> learnedWords = new List<LearnedWord>(); List<TermToAdd> termsToAdd = new List<TermToAdd>(); var newResult = from term in termsFromDic join word in distinctWords on term.Content equals word select term; List<Term> wordsToAdd = newResult.ToList(); foreach (Term term in wordsToAdd) { termsToAdd.Add(new TermToAdd(term.Id, term.Content)); } Dispatcher.Invoke((Action)(() => pbProgress.Maximum = wordsToAdd.Count() + sentences.Count())); int counter = 0; Regex regex = new Regex(@"\w+", RegexOptions.IgnoreCase); int sentCount = 0; Parallel.ForEach(sentences, sentence => { Debug.Print("strict search for example. Sentence {0}/{1}", sentCount++, sentences.Count()); foreach (Match match in regex.Matches(sentence)) { TermToAdd t = (from term in termsToAdd where term.Term == match.ToString() && term.Examples.Count() < 3 select term).FirstOrDefault(); if (t != null) t.Examples.Add(sentence.Trim()); } Interlocked.Increment(ref counter); Dispatcher.Invoke(updatePbDelegate, System.Windows.Threading.DispatcherPriority.Background, new object[] { ProgressBar.ValueProperty, (double)counter }); }); int debugCounter = 0; Parallel.ForEach(termsToAdd, term => { StringBuilder realExamples = new StringBuilder(); if (term.Examples.Count() != 0) { foreach (string example in term.Examples) { realExamples.Append(FormatExample(example)); } } else { Regex weakRegex = new Regex("\\W" + term.Term + "\\W", RegexOptions.IgnoreCase); List<string> examplesList = (from sentence in sentences where weakRegex.IsMatch(sentence) select sentence).Take(3).ToList(); foreach (string example in examplesList) { realExamples.Append(FormatExample(example)); } if (realExamples.Length == 0) { weakRegex = new Regex("\\W" + term.Term, RegexOptions.IgnoreCase); examplesList = (from sentence in sentences where weakRegex.IsMatch(sentence) select sentence).Take(3).ToList(); foreach (string example in examplesList) { realExamples.Append(FormatExample(example)); } } } lock (learnedWords) { learnedWords.Add(new LearnedWord(vocabulary.Id, term.TermId, realExamples.ToString().Trim())); Interlocked.Increment(ref counter); Dispatcher.Invoke(updatePbDelegate, System.Windows.Threading.DispatcherPriority.Background, new object[] { ProgressBar.ValueProperty, (double)counter }); if (learnedWords.Count > 100) { DB.LearnedWords.AddContent(learnedWords); learnedWords.Clear(); } } Debug.Print("{0}/{1}", debugCounter++, wordsToAdd.Count()); }); if (learnedWords.Count > 0) { DB.LearnedWords.AddContent(learnedWords); learnedWords.Clear(); Dispatcher.Invoke(updatePbDelegate, System.Windows.Threading.DispatcherPriority.Background, new object[] { ProgressBar.ValueProperty, (double)counter }); } Debug.Print("Vocabulary has been created"); }