private void RadialNeuralLearn() { if (learningDocInfo != null && learningDocInfo.SourceDir != Settings.Default.pathLearningDir) { learningDocInfo = null; } //ładuje listę kategorii DocumentClass.LoadFromFiles(Settings.Default.pathLearningDir, PreprocessingConsts.CategoryFilePattern); //stworzenie słownika dictionary = DictionaryFactory(Settings.Default.pathSummaryFile); //dictionary.LearningData = new List<DocClass.Src.Learning.LearningPair>(); //stworzenie sieci radialNetwork = new RadialNetwork(Settings.Default.numberNeuronsHidden, DocumentClass.CategoriesCount); DocumentList dl = PreprocessingUtility.CreateLearningDocumentList(Settings.Default.pathLearningDir, dictionary, (DocumentRepresentationType)Settings.Default.documentRepresentationType, learningDocInfo); if (radialNetwork.Learn(dl) == false) { radialNetwork = null; dictionary = null; } }
/// <summary> /// Tworzy dokument na podstawie wcześniej przygotowanego pliku. /// </summary> /// <param name="fileName">Plik z danymi.</param> /// <param name="dictionary">Słownik, na podstawie którego tworzony jest dokument.</param> /// <param name="className">Nazwa klasy, do której należy dany dokument lub null, jeśli klasa jest nieznana.</param> /// <param name="learningDocInfo">Obiekt zawierający informacje o wszystkich dokumentach uczących.</param> public TfIdfDocument(String fileName, Dictionary dictionary, String className, LearningDocInfo learningDocInfo) : base(dictionary) { wordCountList = new WordCountList(); if (className != null) { classNo = DocumentClass.GetClassIndex(className); } //tworze liste wszystkich słów ze wszystkuch dokumentów Dictionary <String, WordInfo> allWordsInfo = learningDocInfo.AllWordsInfo; double allDocNumber = learningDocInfo.AllDocCount; //tworze liste słów w dokumencie WordCountList wordsInDoc = new WordCountList(fileName); int wordsInDocCount = wordsInDoc.GetAllWordsCount(); foreach (String word in dictionary) { if (wordsInDoc[word] != -1) { double inclDocCount = allWordsInfo[word].InclDocCount; //double tfIdf = (wordsInDoc[word] / wordsInDocCount) * Math.Log10(allDocNumber/inclDocCount); double tfIdf = PreprocessingUtility.ComputeTfIdf(wordsInDoc[word], wordsInDocCount, allDocNumber, inclDocCount); wordCountList.Add(new WordCountPair(word, tfIdf)); } else { wordCountList.Add(new WordCountPair(word, 0)); } } }
/// <summary> /// Metoda robiąca preprocessing piku. /// </summary> /// <param name="sourcePath"></param> /// <param name="destinationFile"></param> public void PreprocessingFile(String sourcePath, String destinationFile) { if (stopWords == null) { stopWords = PreprocessingUtility.LoadStopWords(Settings.Default.pathStopWords); } PreprocessingUtility.StemFile(sourcePath, destinationFile, stopWords); }
/// <summary> /// Metoda robiąca preprocesing całęgo katalogu. /// Odpala wątek wykonujący preprocesing. /// </summary> /// <param name="sourcePath"></param> public void PreprocessingDir(String sourcePath) { int dirNumber = PreprocessingUtility.GetDocumentsNumber(sourcePath); preprocessingForm = new PreprocessingForm(this); preprocessingForm.MaxProgress = dirNumber; preprocessingPath = sourcePath; preprocessingWorker.RunWorkerAsync(); preprocessingForm.ShowDialog(); }
/// <summary> /// Tworzy nowy słownik. /// </summary> /// <param name="sourceDir">Katalog zawierający wszystkie pliki uczące.</param> /// <param name="summaryFile">Plik z podsumowaniem wszystkich plików.</param> /// <param name="size">Ilość słów w słowniku</param> public CtfIdfDictionary(String sourceDir, String summaryFile, int size) { Dictionary <String, WordCountPair> tmpDictionary = new Dictionary <string, WordCountPair>(); //wczytanie informacji o wszystkich słowach LearningDocInfo learningDocInfo = new LearningDocInfo(sourceDir, summaryFile); Dictionary <String, WordInfo> allWords = learningDocInfo.AllWordsInfo; int allDocCount = learningDocInfo.AllDocCount; //tworzenie słownika DirectoryInfo sourceDirInfo = new DirectoryInfo(sourceDir); foreach (DirectoryInfo dirInfo in sourceDirInfo.GetDirectories()) //przechodzę po wszystkich podkatalogach { DirectoryInfo stemDir = new DirectoryInfo(dirInfo.FullName + "//stem"); foreach (FileInfo fileInfo in stemDir.GetFiles()) //przechodzę po wszystkich plikach { WordCountList wordsInFile = new WordCountList(fileInfo.FullName); int wordsInDocCount = wordsInFile.GetAllWordsCount(); foreach (WordCountPair wordCountPair in wordsInFile) //przechodzę po wszsytkich słowach { double tfIdf = PreprocessingUtility.ComputeTfIdf(wordCountPair.Count, wordsInDocCount, allDocCount, allWords[wordCountPair.Word].InclDocCount); if (tmpDictionary.ContainsKey(wordCountPair.Word)) { tmpDictionary[wordCountPair.Word].Count += tfIdf; } else { tmpDictionary.Add(wordCountPair.Word, new WordCountPair(wordCountPair.Word, tfIdf)); } } } } //wybranie odpowiednich słów WordCountPair[] tmpArray = new WordCountPair[tmpDictionary.Count]; tmpDictionary.Values.CopyTo(tmpArray, 0); Array.Sort(tmpArray); Array.Reverse(tmpArray); //kopiowanie do właściwej listy wordList = new List <string>(); for (int i = 0; i < size; i++) { wordList.Add(tmpArray[i].Word); } }
/// <summary> /// Metoda robiąca preprocesing całego katalogu. /// Odpalana w oddzielnym wątku. /// </summary> public void PreprocessingDirProcess() { if (stopWords == null) { stopWords = PreprocessingUtility.LoadStopWords(Settings.Default.pathStopWords); } DirectoryInfo rootDirInfo = new DirectoryInfo(preprocessingPath); foreach (DirectoryInfo sourceDirInfo in rootDirInfo.GetDirectories()) { PreprocessingUtility.StemDir(sourceDirInfo.FullName, stopWords, preprocessingWorker); if (preprocessingWorker.CancellationPending) { return; } PreprocessingUtility.SumWords(sourceDirInfo.FullName + "\\stem\\", PreprocessingConsts.StemmedFilePattern, rootDirInfo + "\\" + sourceDirInfo.Name + PreprocessingConsts.CategoryFileExtension); } PreprocessingUtility.SumWords(preprocessingPath, PreprocessingConsts.CategoryFilePattern, rootDirInfo + "\\" + PreprocessingConsts.SummaryFileName); }
/// <summary> /// WCZYTANIE KATALOGU. /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void OnDirectoryToolStripMenuItem_Click(object sender, EventArgs e) { String pathTemp = ShowFolderBrowserDialog(); if (pathTemp == null) { return; } OperationType operationType = (OperationType)Settings.Default.operationType; switch (operationType) { case OperationType.Learning: String pathSummaryTemp = pathTemp + "\\" + PreprocessingConsts.SummaryFileName; if (!IsPrepocessingDone(pathTemp)) { MessageBox.Show("Dokonaj preprocessing'u dla danych ucz¹cych.", "Preprocessing", MessageBoxButtons.OK, MessageBoxIcon.Information); return; } Settings.Default.pathLearningDir = pathTemp; Settings.Default.pathSummaryFile = pathSummaryTemp; Settings.Default.numberAllWordsInDictionary = new WordCountList(pathSummaryTemp).GetUniqueWordsCount(); Settings.Default.numberLearningCategories = PreprocessingUtility.GetCategoryNumber(pathTemp); Settings.Default.numberLearningDocuments = PreprocessingUtility.GetDocumentsNumber(pathTemp); SetFormStateAfterLoadLearningData(); break; case OperationType.Classification: Settings.Default.pathClassificationDir = pathTemp; AddItemsToClassificationResultFtomDir(Settings.Default.pathClassificationDir); SetFormStateAfterLoadClassificateData(); break; default: break; } }