Ejemplo n.º 1
0
        // Generates what should be printed into the .txt file as the predicted book.
        public List <SentenceExample> GeneratePredictedBook(List <SentenceExample> allSentences, DecisionTree model,
                                                            int numAdjacentExamples)
        {
            var    predictedBook = new List <SentenceExample>();
            Random rand          = new Random();

            // Add sentences to predictedBook.
            var firstSentence = allSentences[rand.Next(0, allSentences.Count)];

            allSentences.Remove(firstSentence);
            predictedBook.Add(firstSentence);

            while (allSentences.Count != 0)
            {
                var nextSentences = GenerateNextSentences(allSentences, firstSentence, numAdjacentExamples, rand);
                var labels        = model.Predict(ClassificationUtil.ConstructMatrixX(bagOfWords, nextSentences));

                // Gets the best sentence according to label.
                int maxValue = labels.Max();
                int maxIndex = labels.ToList().IndexOf(maxValue);

                var bestNextSentence = nextSentences[maxIndex];

                // Add bestNextSentence as next sentence in new book.
                allSentences.Remove(bestNextSentence);
                predictedBook.Add(bestNextSentence);
                firstSentence = bestNextSentence;
            }

            return(predictedBook);
        }
Ejemplo n.º 2
0
        // TODO refactor
        public void TrainModel(List <string> ids, int numExamplesToClassify, int numAdjacentExamples)
        {
            // At least 2 ids to train on.
            if (ids.Count < 2)
            {
                throw new Exception("At least 2 ids must be specified.");
            }

            var trainingBooks = _books.Where(book => ids.Contains(book.Key)).ToList();

            // Classify all sentences.
            var allSentences = new List <SentenceExample>();

            foreach (var book in trainingBooks)
            {
                allSentences.AddRange(book.Value.sentences);
            }

            var rand = new Random();

            RandomUtil.Shuffle(allSentences, rand);

            // TODO -> magic number, should be hyperparam
            var clusteringModel = new KMeans(4);

            clusteringModel.Fit(ClassificationUtil.ConstructMatrixX(_bagOfWords,
                                                                    allSentences.Take(allSentences.Count / 2).ToList()));
            var clusterList = clusteringModel.Predict(ClassificationUtil.ConstructMatrixX(_bagOfWords, allSentences));

            for (int i = 0; i < clusterList.Count; i++)
            {
                allSentences[i].classification = clusterList[i];
            }

            // Get random assortment of sentences to train on.
            var sentencesToClassify = new List <SentenceExample>();

            for (int i = 0; i < allSentences.Count && i < numExamplesToClassify; i++)
            {
                sentencesToClassify.Add(allSentences[i]);
            }

            // Get training data using sentences from user input.
            var trainingData = DisplayAdjacentSentences(sentencesToClassify, numAdjacentExamples, rand);

            var bookCol = new BookCollection(ids);

            bookCol.sentences        = allSentences;
            bookCol.trainingExamples = trainingData.Item1;
            bookCol.trainingLabels   = trainingData.Item2;
            bookCol.bagOfWords       = _bagOfWords;

            _bookCollections.Add(bookCol.id, bookCol);
        }
Ejemplo n.º 3
0
        public string GenerateBook(string id, int maxDepth, int numTrees, int numAdjacentExamples)
        {
            var bookCol = _bookCollections[id];

            // Train model.
            //var model = new RandomForest(maxDepth, numTrees);
            var model  = new DecisionTree(maxDepth, new DecisionStumpInfoGain());
            var XTrain = ClassificationUtil.ConstructMatrixX(_bagOfWords, bookCol.trainingExamples);

            model.Fit(XTrain, bookCol.trainingLabels);

            var allSentences = bookCol.sentences;

            var bookGenerator = new BookGenerator(_bagOfWords);

            string[] stringOutput = bookGenerator.GeneratePredictedBook(allSentences, model, numAdjacentExamples)
                                    .Select(x => x.sentence).ToArray();
            var outputLocation = @"..\..\" + bookCol.id + ".txt";

            System.IO.File.WriteAllLines(outputLocation, stringOutput);
            return(outputLocation);
        }