예제 #1
0
        private void RadialNeuralLearn()
        {
            if (learningDocInfo != null && learningDocInfo.SourceDir != Settings.Default.pathLearningDir)
            {
                learningDocInfo = null;
            }

            //ładuje listę kategorii
            DocumentClass.LoadFromFiles(Settings.Default.pathLearningDir, PreprocessingConsts.CategoryFilePattern);

            //stworzenie słownika
            dictionary = DictionaryFactory(Settings.Default.pathSummaryFile);
            //dictionary.LearningData = new List<DocClass.Src.Learning.LearningPair>();

            //stworzenie sieci
            radialNetwork = new RadialNetwork(Settings.Default.numberNeuronsHidden, DocumentClass.CategoriesCount);

            DocumentList dl = PreprocessingUtility.CreateLearningDocumentList(Settings.Default.pathLearningDir, dictionary, (DocumentRepresentationType)Settings.Default.documentRepresentationType, learningDocInfo);

            if (radialNetwork.Learn(dl) == false)
            {
                radialNetwork = null;
                dictionary    = null;
            }
        }
예제 #2
0
        /// <summary>
        /// Tworzy dokument na podstawie wcześniej przygotowanego pliku.
        /// </summary>
        /// <param name="fileName">Plik z danymi.</param>
        /// <param name="dictionary">Słownik, na podstawie którego tworzony jest dokument.</param>
        /// <param name="className">Nazwa klasy, do której należy dany dokument lub null, jeśli klasa jest nieznana.</param>
        /// <param name="learningDocInfo">Obiekt zawierający informacje o wszystkich dokumentach uczących.</param>
        public TfIdfDocument(String fileName, Dictionary dictionary, String className, LearningDocInfo learningDocInfo)
            : base(dictionary)
        {
            wordCountList = new WordCountList();
            if (className != null)
            {
                classNo = DocumentClass.GetClassIndex(className);
            }
            //tworze liste wszystkich słów ze wszystkuch dokumentów
            Dictionary <String, WordInfo> allWordsInfo = learningDocInfo.AllWordsInfo;
            double allDocNumber = learningDocInfo.AllDocCount;
            //tworze liste słów w dokumencie
            WordCountList wordsInDoc = new WordCountList(fileName);

            int wordsInDocCount = wordsInDoc.GetAllWordsCount();

            foreach (String word in dictionary)
            {
                if (wordsInDoc[word] != -1)
                {
                    double inclDocCount = allWordsInfo[word].InclDocCount;
                    //double tfIdf = (wordsInDoc[word] / wordsInDocCount) * Math.Log10(allDocNumber/inclDocCount);
                    double tfIdf = PreprocessingUtility.ComputeTfIdf(wordsInDoc[word], wordsInDocCount, allDocNumber, inclDocCount);
                    wordCountList.Add(new WordCountPair(word, tfIdf));
                }
                else
                {
                    wordCountList.Add(new WordCountPair(word, 0));
                }
            }
        }
예제 #3
0
 /// <summary>
 /// Metoda robiąca preprocessing piku.
 /// </summary>
 /// <param name="sourcePath"></param>
 /// <param name="destinationFile"></param>
 public void PreprocessingFile(String sourcePath, String destinationFile)
 {
     if (stopWords == null)
     {
         stopWords = PreprocessingUtility.LoadStopWords(Settings.Default.pathStopWords);
     }
     PreprocessingUtility.StemFile(sourcePath, destinationFile, stopWords);
 }
예제 #4
0
        /// <summary>
        /// Metoda robiąca preprocesing całęgo katalogu.
        /// Odpala wątek wykonujący preprocesing.
        /// </summary>
        /// <param name="sourcePath"></param>
        public void PreprocessingDir(String sourcePath)
        {
            int dirNumber = PreprocessingUtility.GetDocumentsNumber(sourcePath);

            preprocessingForm             = new PreprocessingForm(this);
            preprocessingForm.MaxProgress = dirNumber;
            preprocessingPath             = sourcePath;
            preprocessingWorker.RunWorkerAsync();
            preprocessingForm.ShowDialog();
        }
예제 #5
0
        /// <summary>
        /// Tworzy nowy słownik.
        /// </summary>
        /// <param name="sourceDir">Katalog zawierający wszystkie pliki uczące.</param>
        /// <param name="summaryFile">Plik z podsumowaniem wszystkich plików.</param>
        /// <param name="size">Ilość słów w słowniku</param>
        public CtfIdfDictionary(String sourceDir, String summaryFile, int size)
        {
            Dictionary <String, WordCountPair> tmpDictionary = new Dictionary <string, WordCountPair>();
            //wczytanie informacji o wszystkich słowach
            LearningDocInfo learningDocInfo        = new LearningDocInfo(sourceDir, summaryFile);
            Dictionary <String, WordInfo> allWords = learningDocInfo.AllWordsInfo;
            int allDocCount = learningDocInfo.AllDocCount;
            //tworzenie słownika
            DirectoryInfo sourceDirInfo = new DirectoryInfo(sourceDir);

            foreach (DirectoryInfo dirInfo in sourceDirInfo.GetDirectories()) //przechodzę po wszystkich podkatalogach
            {
                DirectoryInfo stemDir = new DirectoryInfo(dirInfo.FullName + "//stem");
                foreach (FileInfo fileInfo in stemDir.GetFiles()) //przechodzę po wszystkich plikach
                {
                    WordCountList wordsInFile     = new WordCountList(fileInfo.FullName);
                    int           wordsInDocCount = wordsInFile.GetAllWordsCount();
                    foreach (WordCountPair wordCountPair in wordsInFile) //przechodzę po wszsytkich słowach
                    {
                        double tfIdf = PreprocessingUtility.ComputeTfIdf(wordCountPair.Count, wordsInDocCount, allDocCount, allWords[wordCountPair.Word].InclDocCount);
                        if (tmpDictionary.ContainsKey(wordCountPair.Word))
                        {
                            tmpDictionary[wordCountPair.Word].Count += tfIdf;
                        }
                        else
                        {
                            tmpDictionary.Add(wordCountPair.Word, new WordCountPair(wordCountPair.Word, tfIdf));
                        }
                    }
                }
            }
            //wybranie odpowiednich słów
            WordCountPair[] tmpArray = new WordCountPair[tmpDictionary.Count];
            tmpDictionary.Values.CopyTo(tmpArray, 0);
            Array.Sort(tmpArray);
            Array.Reverse(tmpArray);
            //kopiowanie do właściwej listy
            wordList = new List <string>();
            for (int i = 0; i < size; i++)
            {
                wordList.Add(tmpArray[i].Word);
            }
        }
예제 #6
0
        /// <summary>
        /// Metoda robiąca preprocesing całego katalogu.
        /// Odpalana w oddzielnym wątku.
        /// </summary>
        public void PreprocessingDirProcess()
        {
            if (stopWords == null)
            {
                stopWords = PreprocessingUtility.LoadStopWords(Settings.Default.pathStopWords);
            }

            DirectoryInfo rootDirInfo = new DirectoryInfo(preprocessingPath);

            foreach (DirectoryInfo sourceDirInfo in rootDirInfo.GetDirectories())
            {
                PreprocessingUtility.StemDir(sourceDirInfo.FullName, stopWords, preprocessingWorker);
                if (preprocessingWorker.CancellationPending)
                {
                    return;
                }
                PreprocessingUtility.SumWords(sourceDirInfo.FullName + "\\stem\\", PreprocessingConsts.StemmedFilePattern, rootDirInfo + "\\" + sourceDirInfo.Name + PreprocessingConsts.CategoryFileExtension);
            }
            PreprocessingUtility.SumWords(preprocessingPath, PreprocessingConsts.CategoryFilePattern, rootDirInfo + "\\" + PreprocessingConsts.SummaryFileName);
        }
예제 #7
0
        /// <summary>
        /// WCZYTANIE KATALOGU.
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void OnDirectoryToolStripMenuItem_Click(object sender, EventArgs e)
        {
            String pathTemp = ShowFolderBrowserDialog();

            if (pathTemp == null)
            {
                return;
            }

            OperationType operationType = (OperationType)Settings.Default.operationType;

            switch (operationType)
            {
            case OperationType.Learning:
                String pathSummaryTemp = pathTemp + "\\" + PreprocessingConsts.SummaryFileName;
                if (!IsPrepocessingDone(pathTemp))
                {
                    MessageBox.Show("Dokonaj preprocessing'u dla danych ucz¹cych.", "Preprocessing", MessageBoxButtons.OK, MessageBoxIcon.Information);
                    return;
                }

                Settings.Default.pathLearningDir            = pathTemp;
                Settings.Default.pathSummaryFile            = pathSummaryTemp;
                Settings.Default.numberAllWordsInDictionary = new WordCountList(pathSummaryTemp).GetUniqueWordsCount();
                Settings.Default.numberLearningCategories   = PreprocessingUtility.GetCategoryNumber(pathTemp);
                Settings.Default.numberLearningDocuments    = PreprocessingUtility.GetDocumentsNumber(pathTemp);

                SetFormStateAfterLoadLearningData();
                break;

            case OperationType.Classification:
                Settings.Default.pathClassificationDir = pathTemp;
                AddItemsToClassificationResultFtomDir(Settings.Default.pathClassificationDir);
                SetFormStateAfterLoadClassificateData();
                break;

            default:
                break;
            }
        }