/// <summary>Train the singleton predictor using a logistic regression classifier.</summary> /// <param name="pDataset">Dataset of features</param> /// <returns>Singleton predictor</returns> public static LogisticClassifier <string, string> Train(GeneralDataset <string, string> pDataset) { LogisticClassifierFactory <string, string> lcf = new LogisticClassifierFactory <string, string>(); LogisticClassifier <string, string> classifier = lcf.TrainClassifier(pDataset); return(classifier); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> public virtual ICounter <string> GetTopFeatures(IEnumerator <Pair <IDictionary <string, DataInstance>, File> > sentsf, double perSelectRand, double perSelectNeg, string externalFeatureWeightsFileLabel) { ICounter <string> features = new ClassicCounter <string>(); RVFDataset <string, string> dataset = new RVFDataset <string, string>(); Random r = new Random(10); Random rneg = new Random(10); int numrand = 0; IList <Pair <string, int> > chosen = new List <Pair <string, int> >(); while (sentsf.MoveNext()) { Pair <IDictionary <string, DataInstance>, File> sents = sentsf.Current; numrand = this.Sample(sents.First(), r, rneg, perSelectNeg, perSelectRand, numrand, chosen, dataset); } /*if(batchProcessSents){ * for(File f: sentFiles){ * Map<String, List<CoreLabel>> sentsf = IOUtils.readObjectFromFile(f); * numrand = this.sample(sentsf, r, rneg, perSelectNeg, perSelectRand, numrand, chosen, dataset); * } * }else * numrand = this.sample(sents, r, rneg, perSelectNeg, perSelectRand, numrand, chosen, dataset); */ System.Console.Out.WriteLine("num random chosen: " + numrand); System.Console.Out.WriteLine("Number of datums per label: " + dataset.NumDatumsPerLabel()); LogisticClassifierFactory <string, string> logfactory = new LogisticClassifierFactory <string, string>(); LogisticClassifier <string, string> classifier = logfactory.TrainClassifier(dataset); ICounter <string> weights = classifier.WeightsAsCounter(); if (!classifier.GetLabelForInternalPositiveClass().Equals(answerLabel)) { weights = Counters.Scale(weights, -1); } if (thresholdWeight != null) { HashSet <string> removeKeys = new HashSet <string>(); foreach (KeyValuePair <string, double> en in weights.EntrySet()) { if (Math.Abs(en.Value) <= thresholdWeight) { removeKeys.Add(en.Key); } } Counters.RemoveKeys(weights, removeKeys); System.Console.Out.WriteLine("Removing " + removeKeys); } IOUtils.WriteStringToFile(Counters.ToSortedString(weights, weights.Size(), "%1$s:%2$f", "\n"), externalFeatureWeightsFileLabel, "utf8"); // getDecisionTree(sents, chosen, weights, wekaOptions); return(features); }