// TODO refactor public void TrainModel(List <string> ids, int numExamplesToClassify, int numAdjacentExamples) { // At least 2 ids to train on. if (ids.Count < 2) { throw new Exception("At least 2 ids must be specified."); } var trainingBooks = _books.Where(book => ids.Contains(book.Key)).ToList(); // Classify all sentences. var allSentences = new List <SentenceExample>(); foreach (var book in trainingBooks) { allSentences.AddRange(book.Value.sentences); } var rand = new Random(); RandomUtil.Shuffle(allSentences, rand); // TODO -> magic number, should be hyperparam var clusteringModel = new KMeans(4); clusteringModel.Fit(ClassificationUtil.ConstructMatrixX(_bagOfWords, allSentences.Take(allSentences.Count / 2).ToList())); var clusterList = clusteringModel.Predict(ClassificationUtil.ConstructMatrixX(_bagOfWords, allSentences)); for (int i = 0; i < clusterList.Count; i++) { allSentences[i].classification = clusterList[i]; } // Get random assortment of sentences to train on. var sentencesToClassify = new List <SentenceExample>(); for (int i = 0; i < allSentences.Count && i < numExamplesToClassify; i++) { sentencesToClassify.Add(allSentences[i]); } // Get training data using sentences from user input. var trainingData = DisplayAdjacentSentences(sentencesToClassify, numAdjacentExamples, rand); var bookCol = new BookCollection(ids); bookCol.sentences = allSentences; bookCol.trainingExamples = trainingData.Item1; bookCol.trainingLabels = trainingData.Item2; bookCol.bagOfWords = _bagOfWords; _bookCollections.Add(bookCol.id, bookCol); }
void Start() { //Generate Data GenerateRandomData(); //Create Classifier kMeans = new KMeans(numOfClusters); kMeans.Fit(tempData); CreateSpheres(); CreateClusters(); StartCoroutine(startFit()); }