private void Button_saveResults_Click(object sender, EventArgs e) { var myUniqueFileName = DirectoryManager.GetSpecifiedDirectory("Results") + DirectoryManager.sep + $@"{DateTime.Now.Ticks}.txt"; using (var sw = new StreamWriter(myUniqueFileName)) { for (int i = 0; i < numberOfUsedWindows; i++) { Window w = arrayOfWindows[i]; if (w.Txtb.Text == "") { break; } sw.WriteLine("Model: " + Path.GetFileName(w.Model)); sw.WriteLine("Input: " + w.Input); if (w.UsedMorphodita) { sw.WriteLine("Output: " + w.Output); sw.WriteLine("MorphoDiTa: pouzita"); } else { sw.WriteLine("Output: " + w.Output); sw.WriteLine("MorphoDiTa: nepouzita"); } sw.WriteLine(); } } Reset(); }
/// <summary> /// This methods loads files into ListBox /// </summary> private void LoadFiles() { string path = DirectoryManager.GetSpecifiedDirectory("SourceTXTFiles"); string[] files = Directory.GetFiles(path); if (files.Length == 0) { textBox_Error.Text = "Je třeba nejdříve upravit soubor .txt."; } else { DirectoryName = Path.GetDirectoryName(files[0]); for (int i = 0; i < files.Length; i++) { listBox_modelFiles.Items.Add(Path.GetFileNameWithoutExtension(files[i])); } } }
private void Button_change_TXT_Click(object sender, EventArgs e) { Hide(); while (true) { OpenFileDialog openFileDialog1 = new OpenFileDialog { InitialDirectory = DirectoryManager.GetSpecifiedDirectory("PlainTexts"), Filter = "Formát txt (*.txt)|*.txt|Všechny formáty (*.*)|*.*", RestoreDirectory = true }; if (openFileDialog1.ShowDialog() == DialogResult.OK) { try { if (openFileDialog1.FileName.EndsWith(".txt")) { TransformTXTFile.TransformFile(openFileDialog1.FileName); MakeResultsVisible(true, ".txt", openFileDialog1.FileName); break; } else { MakeResultsVisible(false, ".txt", openFileDialog1.FileName); Show(); } } catch (SecurityException) { MessageBox.Show("Chyba při načítání souboru."); } } else { ResetWindow(); Show(); break; } } }
/// <summary> /// This method merges all files multiplied by chosen constant to a single file /// which will be saved in TMP file. /// </summary> /// <returns></returns> private string MergeAllFiles() { string tmpFile = DirectoryManager.GetSpecifiedDirectory("Temp") + DirectoryManager.sep + textBox_fileName.Text + ".txt"; foreach (var item in DictFilesInModel) { string source = DirectoryManager.GetSpecifiedDirectory("SourceTXTFiles") + DirectoryManager.sep + item.Key.ToString() + ".txt"; for (int i = 0; i < item.Value[0]; i++) { using (Stream input = File.OpenRead(source)) using (Stream output = new FileStream(tmpFile, FileMode.Append, FileAccess.Write, FileShare.None)) { input.CopyTo(output); } } } return(tmpFile); }
/// <summary> /// This part trains model /// </summary> /// <author> /// This method with libraries in Word2Vec.Net-master was programmed by GitHub user Eabdullin /// link to GitHub: https://github.com/eabdullin/Word2Vec.Net /// </author> /// <param name="trainfile">This file is located in SourceTXT folder</param> public static void TrainModel(string trainfile, int sizeOfVectors = 100, int minCount = 5, int iterations = 5) { string outputFileName = DirectoryManager.GetSpecifiedDirectory("Models") + DirectoryManager.sep + Path.GetFileName(trainfile); var word2Vec = Word2VecBuilder.Create() .WithTrainFile(trainfile) // Use text data to train the model; .WithOutputFile(outputFileName) //Use to save the resulting word vectors / word clusters .WithSize(sizeOfVectors) //Set size of word vectors; default is 100 .WithDebug(2) //Set the debug mode (default = 2 = more info during training) .WithCBow(1) //Use the continuous bag of words model; default is 1 (use 0 for skip-gram model) .WithAlpha(0.05f) //Set the starting learning rate; default is 0.025 for skip-gram and 0.05 for CBOW .WithSample((float)1e-3) //Set threshold for occurrence of words. .WithHs(0) //Use Hierarchical Softmax; default is 0 (not used) .WithNegative(5) //Number of negative examples; default is 5, common values are 3 - 10 (0 = not used) .WithThreads(12) //Use <int> threads (default 12) .WithIter(iterations) //Run more training iterations (default 5) .WithMinCount(minCount) //This will discard words that appear less than <int> times; default is 5 .Build(); word2Vec.TrainModel(); }
/// <summary> /// The file will be transformed: /// all letters except names toLower /// all interpunction is wrapped by spaces /// /// </summary> /// <param name="file"></param> public static void TransformFile(string file) { string basenameOfFileWithoutExtensions = Path.GetFileNameWithoutExtension(file); string basenameOfFile = basenameOfFileWithoutExtensions + ".txt"; dictThisWordIsName.Clear(); FillDictionaryOfNames(file); string result = DirectoryManager.GetSpecifiedDirectory("SourceTXTFiles") + DirectoryManager.sep + basenameOfFileWithoutExtensions + ".txt"; if (File.Exists(result)) { int i = 1; while (File.Exists(result)) { result = DirectoryManager.GetSpecifiedDirectory("SourceTXTFiles") + DirectoryManager.sep + basenameOfFileWithoutExtensions + "_" + i + "_.txt"; i++; } } try { using (StreamReader MyStreamReader = new StreamReader(file)) { using (StreamWriter MyStreamWriter = new StreamWriter(result)) { char oneChar; StringBuilder sb = new StringBuilder(); while (MyStreamReader.Peek() >= 0) { oneChar = (char)MyStreamReader.Read(); if (bigCzechChars.Contains(oneChar) || smallCzechChars.Contains(oneChar) || numbers.Contains(oneChar)) { sb.Append(oneChar); } else { string word = sb.ToString(); sb.Clear(); if (word.Length == 0) { MyStreamWriter.Write(ModifyDangerousSymbols(oneChar)); } else if (word[0] <= 'Z' && word[0] >= 'A') { if (dictThisWordIsName.ContainsKey(word.ToLower()) && dictThisWordIsName[word.ToLower()]) { MyStreamWriter.Write(word); } else { MyStreamWriter.Write(word.ToLower()); } } else { MyStreamWriter.Write(word); } MyStreamWriter.Write(ModifyDangerousSymbols(oneChar)); } } } } } catch (System.Text.EncoderFallbackException) { } }