/// <exception cref="System.Exception"/> public static void Main(string[] args) { Properties props; if (args.Length > 0) { props = StringUtils.ArgsToProperties(args); } else { props = new Properties(); } if (!props.Contains("dcoref.conll2011")) { log.Info("-dcoref.conll2011 [input_CoNLL_corpus]: was not specified"); return; } if (!props.Contains("singleton.predictor.output")) { log.Info("-singleton.predictor.output [output_model_file]: was not specified"); return; } GeneralDataset <string, string> data = Edu.Stanford.Nlp.Coref.Misc.SingletonPredictor.GenerateFeatureVectors(props); LogisticClassifier <string, string> classifier = Edu.Stanford.Nlp.Coref.Misc.SingletonPredictor.Train(data); Edu.Stanford.Nlp.Coref.Misc.SingletonPredictor.SaveToSerialized(classifier, GetPathSingletonPredictor(props)); }
/// <summary>Train the singleton predictor using a logistic regression classifier.</summary> /// <param name="pDataset">Dataset of features</param> /// <returns>Singleton predictor</returns> public static LogisticClassifier <string, string> Train(GeneralDataset <string, string> pDataset) { LogisticClassifierFactory <string, string> lcf = new LogisticClassifierFactory <string, string>(); LogisticClassifier <string, string> classifier = lcf.TrainClassifier(pDataset); return(classifier); }
private static void Main(string[] args) { Console.WriteLine("Укажите путь к файлу"); var path = Console.ReadLine(); Console.WriteLine("Укажите размерность прстранства"); var dimension =Convert.ToInt32(Console.ReadLine()); var ff = new FileFramework(@"E:\Users\Nikita\Documents\Visual Studio 2013\Projects\NSUBigData\LinearRegres\iris.csv", Convert.ToInt32(dimension)); Console.WriteLine("\nBegin Logistic Regression (binary) Classification demo"); Console.WriteLine("Goal is to demonstrate training using gradient descent"); var numFeatures = dimension-1; var numRows = 100; var seed = 1; Console.WriteLine("\nGenerating " + numRows + " artificial data items with " + numFeatures + " features"); var allData = ff.ParseFile(); Console.WriteLine("Creating train (80%) and test (20%) matrices"); double[][] trainData; double[][] testData; MakeTrainTest(allData, 0, out trainData, out testData); Console.WriteLine("Done"); Console.WriteLine("\nTraining data: \n"); ShowData(trainData, 3, 2, true); Console.WriteLine("\nTest data: \n"); ShowData(testData, 3, 2, true); Console.WriteLine("Creating LR binary classifier"); var lc = new LogisticClassifier(numFeatures); var maxEpochs = 1000; Console.WriteLine("Setting maxEpochs = " + maxEpochs); var alpha = 0.01; Console.WriteLine("Setting learning rate = " + alpha.ToString("F2")); Console.WriteLine("\nStarting training using (stochastic) gradient descent"); double[] weights = lc.Train(trainData, maxEpochs, alpha); Console.WriteLine("Training complete"); Console.WriteLine("\nBest weights found:"); ShowVector(weights, 4, true); double trainAcc = lc.Accuracy(trainData, weights); Console.WriteLine("Prediction accuracy on training data = " + trainAcc.ToString("F4")); double testAcc = lc.Accuracy(testData, weights); Console.WriteLine("Prediction accuracy on test data = " + testAcc.ToString("F4")); Console.WriteLine("\nEnd LR binary classification demo\n"); Console.ReadLine(); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> public virtual ICounter <string> GetTopFeatures(IEnumerator <Pair <IDictionary <string, DataInstance>, File> > sentsf, double perSelectRand, double perSelectNeg, string externalFeatureWeightsFileLabel) { ICounter <string> features = new ClassicCounter <string>(); RVFDataset <string, string> dataset = new RVFDataset <string, string>(); Random r = new Random(10); Random rneg = new Random(10); int numrand = 0; IList <Pair <string, int> > chosen = new List <Pair <string, int> >(); while (sentsf.MoveNext()) { Pair <IDictionary <string, DataInstance>, File> sents = sentsf.Current; numrand = this.Sample(sents.First(), r, rneg, perSelectNeg, perSelectRand, numrand, chosen, dataset); } /*if(batchProcessSents){ * for(File f: sentFiles){ * Map<String, List<CoreLabel>> sentsf = IOUtils.readObjectFromFile(f); * numrand = this.sample(sentsf, r, rneg, perSelectNeg, perSelectRand, numrand, chosen, dataset); * } * }else * numrand = this.sample(sents, r, rneg, perSelectNeg, perSelectRand, numrand, chosen, dataset); */ System.Console.Out.WriteLine("num random chosen: " + numrand); System.Console.Out.WriteLine("Number of datums per label: " + dataset.NumDatumsPerLabel()); LogisticClassifierFactory <string, string> logfactory = new LogisticClassifierFactory <string, string>(); LogisticClassifier <string, string> classifier = logfactory.TrainClassifier(dataset); ICounter <string> weights = classifier.WeightsAsCounter(); if (!classifier.GetLabelForInternalPositiveClass().Equals(answerLabel)) { weights = Counters.Scale(weights, -1); } if (thresholdWeight != null) { HashSet <string> removeKeys = new HashSet <string>(); foreach (KeyValuePair <string, double> en in weights.EntrySet()) { if (Math.Abs(en.Value) <= thresholdWeight) { removeKeys.Add(en.Key); } } Counters.RemoveKeys(weights, removeKeys); System.Console.Out.WriteLine("Removing " + removeKeys); } IOUtils.WriteStringToFile(Counters.ToSortedString(weights, weights.Size(), "%1$s:%2$f", "\n"), externalFeatureWeightsFileLabel, "utf8"); // getDecisionTree(sents, chosen, weights, wekaOptions); return(features); }
/// <summary>Saves the singleton predictor model to the given filename.</summary> /// <remarks> /// Saves the singleton predictor model to the given filename. /// If there is an error, a RuntimeIOException is thrown. /// </remarks> private static void SaveToSerialized(LogisticClassifier <string, string> predictor, string filename) { try { log.Info("Writing singleton predictor in serialized format to file " + filename + ' '); ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename); @out.WriteObject(predictor); @out.Close(); log.Info("done."); } catch (IOException ioe) { throw new RuntimeIOException(ioe); } }
public void AndTest() { LogisticClassifier regr = new LogisticClassifier(2); regr.AddSample(new double[] { 0, 0 }, 0); regr.AddSample(new double[] { 0, 1 }, 0); regr.AddSample(new double[] { 1, 0 }, 0); regr.AddSample(new double[] { 1, 1 }, 1); //regr.AddSample(new double[] { 1, 1 }, 1); //regr.AddSample(new double[] { 1, 1 }, 1); //regr.AddSample(new double[] { 1, 1 }, 1); int times = 1000; regr.Alpha = 3; regr.Lambda = 0.01; List <BinaryClassificationTrainResult> results = regr.Train(times); for (int i = 0; i < times; i++) { if (i % 100 != 0) { continue; } Debug.Print(results[i].Cost.ToString()); for (int t = 0; t < results[i].Theta.Length; t++) { Debug.Print("\t" + results[i].Theta[t].ToString()); } } double[] h = new double[4]; h[0] = regr.H(new double[] { 1, 0, 0 }); h[1] = regr.H(new double[] { 1, 0, 1 }); h[2] = regr.H(new double[] { 1, 1, 0 }); h[3] = regr.H(new double[] { 1, 1, 1 }); for (int i = 0; i < 4; i++) { Debug.Print(h[i].ToString()); } Assert.AreEqual(regr.Predict(new double[] { 0, 0 }), 0); Assert.AreEqual(regr.Predict(new double[] { 0, 1 }), 0); Assert.AreEqual(regr.Predict(new double[] { 1, 0 }), 0); Assert.AreEqual(regr.Predict(new double[] { 1, 1 }), 1); }
private static void Main(string[] args) { Console.WriteLine("Укажите путь к файлу"); var path = Console.ReadLine(); Console.WriteLine("Укажите размерность прстранства"); var dimension = Convert.ToInt32(Console.ReadLine()); var ff = new FileFramework(@"E:\Users\Nikita\Documents\Visual Studio 2013\Projects\NSUBigData\LinearRegres\iris.csv", Convert.ToInt32(dimension)); Console.WriteLine("\nBegin Logistic Regression (binary) Classification demo"); Console.WriteLine("Goal is to demonstrate training using gradient descent"); var numFeatures = dimension - 1; var numRows = 100; var seed = 1; Console.WriteLine("\nGenerating " + numRows + " artificial data items with " + numFeatures + " features"); var allData = ff.ParseFile(); Console.WriteLine("Creating train (80%) and test (20%) matrices"); double[][] trainData; double[][] testData; MakeTrainTest(allData, 0, out trainData, out testData); Console.WriteLine("Done"); Console.WriteLine("\nTraining data: \n"); ShowData(trainData, 3, 2, true); Console.WriteLine("\nTest data: \n"); ShowData(testData, 3, 2, true); Console.WriteLine("Creating LR binary classifier"); var lc = new LogisticClassifier(numFeatures); var maxEpochs = 1000; Console.WriteLine("Setting maxEpochs = " + maxEpochs); var alpha = 0.01; Console.WriteLine("Setting learning rate = " + alpha.ToString("F2")); Console.WriteLine("\nStarting training using (stochastic) gradient descent"); double[] weights = lc.Train(trainData, maxEpochs, alpha); Console.WriteLine("Training complete"); Console.WriteLine("\nBest weights found:"); ShowVector(weights, 4, true); double trainAcc = lc.Accuracy(trainData, weights); Console.WriteLine("Prediction accuracy on training data = " + trainAcc.ToString("F4")); double testAcc = lc.Accuracy(testData, weights); Console.WriteLine("Prediction accuracy on test data = " + testAcc.ToString("F4")); Console.WriteLine("\nEnd LR binary classification demo\n"); Console.ReadLine(); }
/// <exception cref="System.Exception"/> public CoNLLMentionExtractor(Dictionaries dict, Properties props, Semantics semantics, LogisticClassifier <string, string> singletonModel) : this(dict, props, semantics) { singletonPredictor = singletonModel; }