/// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Properties props;

            if (args.Length > 0)
            {
                props = StringUtils.ArgsToProperties(args);
            }
            else
            {
                props = new Properties();
            }
            if (!props.Contains("dcoref.conll2011"))
            {
                log.Info("-dcoref.conll2011 [input_CoNLL_corpus]: was not specified");
                return;
            }
            if (!props.Contains("singleton.predictor.output"))
            {
                log.Info("-singleton.predictor.output [output_model_file]: was not specified");
                return;
            }
            GeneralDataset <string, string>     data       = Edu.Stanford.Nlp.Coref.Misc.SingletonPredictor.GenerateFeatureVectors(props);
            LogisticClassifier <string, string> classifier = Edu.Stanford.Nlp.Coref.Misc.SingletonPredictor.Train(data);

            Edu.Stanford.Nlp.Coref.Misc.SingletonPredictor.SaveToSerialized(classifier, GetPathSingletonPredictor(props));
        }
        /// <summary>Train the singleton predictor using a logistic regression classifier.</summary>
        /// <param name="pDataset">Dataset of features</param>
        /// <returns>Singleton predictor</returns>
        public static LogisticClassifier <string, string> Train(GeneralDataset <string, string> pDataset)
        {
            LogisticClassifierFactory <string, string> lcf        = new LogisticClassifierFactory <string, string>();
            LogisticClassifier <string, string>        classifier = lcf.TrainClassifier(pDataset);

            return(classifier);
        }
Exemplo n.º 3
0
        private static void Main(string[] args)
        {
            Console.WriteLine("Укажите путь к файлу");
            var path = Console.ReadLine();
            Console.WriteLine("Укажите размерность прстранства");
            var dimension =Convert.ToInt32(Console.ReadLine());
            var ff = new FileFramework(@"E:\Users\Nikita\Documents\Visual Studio 2013\Projects\NSUBigData\LinearRegres\iris.csv", Convert.ToInt32(dimension));
           
            Console.WriteLine("\nBegin Logistic Regression (binary) Classification demo");
            Console.WriteLine("Goal is to demonstrate training using gradient descent");

            var numFeatures = dimension-1; 
            var numRows = 100;
            var seed = 1;

            Console.WriteLine("\nGenerating " + numRows +
                              " artificial data items with " + numFeatures + " features");
            var allData = ff.ParseFile();

            Console.WriteLine("Creating train (80%) and test (20%) matrices");
            double[][] trainData;
            double[][] testData;
            MakeTrainTest(allData, 0, out trainData, out testData);
            Console.WriteLine("Done");


            Console.WriteLine("\nTraining data: \n");
            ShowData(trainData, 3, 2, true);

            Console.WriteLine("\nTest data: \n");
            ShowData(testData, 3, 2, true);


            Console.WriteLine("Creating LR binary classifier");
            var lc = new LogisticClassifier(numFeatures); 

            var maxEpochs = 1000;
            Console.WriteLine("Setting maxEpochs = " + maxEpochs);
            var alpha = 0.01;
            Console.WriteLine("Setting learning rate = " + alpha.ToString("F2"));

            Console.WriteLine("\nStarting training using (stochastic) gradient descent");
            double[] weights = lc.Train(trainData, maxEpochs, alpha);
            Console.WriteLine("Training complete");

            Console.WriteLine("\nBest weights found:");
            ShowVector(weights, 4, true);

            double trainAcc = lc.Accuracy(trainData, weights);
            Console.WriteLine("Prediction accuracy on training data = " +
                              trainAcc.ToString("F4"));

            double testAcc = lc.Accuracy(testData, weights);
            Console.WriteLine("Prediction accuracy on test data = " +
                              testAcc.ToString("F4"));

            Console.WriteLine("\nEnd LR binary classification demo\n");
            Console.ReadLine();
        }
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        public virtual ICounter <string> GetTopFeatures(IEnumerator <Pair <IDictionary <string, DataInstance>, File> > sentsf, double perSelectRand, double perSelectNeg, string externalFeatureWeightsFileLabel)
        {
            ICounter <string>           features = new ClassicCounter <string>();
            RVFDataset <string, string> dataset  = new RVFDataset <string, string>();
            Random r       = new Random(10);
            Random rneg    = new Random(10);
            int    numrand = 0;
            IList <Pair <string, int> > chosen = new List <Pair <string, int> >();

            while (sentsf.MoveNext())
            {
                Pair <IDictionary <string, DataInstance>, File> sents = sentsf.Current;
                numrand = this.Sample(sents.First(), r, rneg, perSelectNeg, perSelectRand, numrand, chosen, dataset);
            }

            /*if(batchProcessSents){
             * for(File f: sentFiles){
             * Map<String, List<CoreLabel>> sentsf = IOUtils.readObjectFromFile(f);
             * numrand = this.sample(sentsf, r, rneg, perSelectNeg, perSelectRand, numrand, chosen, dataset);
             * }
             * }else
             * numrand = this.sample(sents, r, rneg, perSelectNeg, perSelectRand, numrand, chosen, dataset);
             */
            System.Console.Out.WriteLine("num random chosen: " + numrand);
            System.Console.Out.WriteLine("Number of datums per label: " + dataset.NumDatumsPerLabel());
            LogisticClassifierFactory <string, string> logfactory = new LogisticClassifierFactory <string, string>();
            LogisticClassifier <string, string>        classifier = logfactory.TrainClassifier(dataset);
            ICounter <string> weights = classifier.WeightsAsCounter();

            if (!classifier.GetLabelForInternalPositiveClass().Equals(answerLabel))
            {
                weights = Counters.Scale(weights, -1);
            }
            if (thresholdWeight != null)
            {
                HashSet <string> removeKeys = new HashSet <string>();
                foreach (KeyValuePair <string, double> en in weights.EntrySet())
                {
                    if (Math.Abs(en.Value) <= thresholdWeight)
                    {
                        removeKeys.Add(en.Key);
                    }
                }
                Counters.RemoveKeys(weights, removeKeys);
                System.Console.Out.WriteLine("Removing " + removeKeys);
            }
            IOUtils.WriteStringToFile(Counters.ToSortedString(weights, weights.Size(), "%1$s:%2$f", "\n"), externalFeatureWeightsFileLabel, "utf8");
            // getDecisionTree(sents, chosen, weights, wekaOptions);
            return(features);
        }
 /// <summary>Saves the singleton predictor model to the given filename.</summary>
 /// <remarks>
 /// Saves the singleton predictor model to the given filename.
 /// If there is an error, a RuntimeIOException is thrown.
 /// </remarks>
 private static void SaveToSerialized(LogisticClassifier <string, string> predictor, string filename)
 {
     try
     {
         log.Info("Writing singleton predictor in serialized format to file " + filename + ' ');
         ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
         @out.WriteObject(predictor);
         @out.Close();
         log.Info("done.");
     }
     catch (IOException ioe)
     {
         throw new RuntimeIOException(ioe);
     }
 }
Exemplo n.º 6
0
        public void AndTest()
        {
            LogisticClassifier regr = new LogisticClassifier(2);

            regr.AddSample(new double[] { 0, 0 }, 0);
            regr.AddSample(new double[] { 0, 1 }, 0);
            regr.AddSample(new double[] { 1, 0 }, 0);
            regr.AddSample(new double[] { 1, 1 }, 1);
            //regr.AddSample(new double[] { 1, 1 }, 1);
            //regr.AddSample(new double[] { 1, 1 }, 1);
            //regr.AddSample(new double[] { 1, 1 }, 1);
            int times = 1000;

            regr.Alpha  = 3;
            regr.Lambda = 0.01;
            List <BinaryClassificationTrainResult> results = regr.Train(times);


            for (int i = 0; i < times; i++)
            {
                if (i % 100 != 0)
                {
                    continue;
                }
                Debug.Print(results[i].Cost.ToString());
                for (int t = 0; t < results[i].Theta.Length; t++)
                {
                    Debug.Print("\t" + results[i].Theta[t].ToString());
                }
            }
            double[] h = new double[4];
            h[0] = regr.H(new double[] { 1, 0, 0 });
            h[1] = regr.H(new double[] { 1, 0, 1 });
            h[2] = regr.H(new double[] { 1, 1, 0 });
            h[3] = regr.H(new double[] { 1, 1, 1 });
            for (int i = 0; i < 4; i++)
            {
                Debug.Print(h[i].ToString());
            }
            Assert.AreEqual(regr.Predict(new double[] { 0, 0 }), 0);
            Assert.AreEqual(regr.Predict(new double[] { 0, 1 }), 0);
            Assert.AreEqual(regr.Predict(new double[] { 1, 0 }), 0);
            Assert.AreEqual(regr.Predict(new double[] { 1, 1 }), 1);
        }
Exemplo n.º 7
0
        private static void Main(string[] args)
        {
            Console.WriteLine("Укажите путь к файлу");
            var path = Console.ReadLine();

            Console.WriteLine("Укажите размерность прстранства");
            var dimension = Convert.ToInt32(Console.ReadLine());
            var ff        = new FileFramework(@"E:\Users\Nikita\Documents\Visual Studio 2013\Projects\NSUBigData\LinearRegres\iris.csv", Convert.ToInt32(dimension));

            Console.WriteLine("\nBegin Logistic Regression (binary) Classification demo");
            Console.WriteLine("Goal is to demonstrate training using gradient descent");

            var numFeatures = dimension - 1;
            var numRows     = 100;
            var seed        = 1;

            Console.WriteLine("\nGenerating " + numRows +
                              " artificial data items with " + numFeatures + " features");
            var allData = ff.ParseFile();

            Console.WriteLine("Creating train (80%) and test (20%) matrices");
            double[][] trainData;
            double[][] testData;
            MakeTrainTest(allData, 0, out trainData, out testData);
            Console.WriteLine("Done");


            Console.WriteLine("\nTraining data: \n");
            ShowData(trainData, 3, 2, true);

            Console.WriteLine("\nTest data: \n");
            ShowData(testData, 3, 2, true);


            Console.WriteLine("Creating LR binary classifier");
            var lc = new LogisticClassifier(numFeatures);

            var maxEpochs = 1000;

            Console.WriteLine("Setting maxEpochs = " + maxEpochs);
            var alpha = 0.01;

            Console.WriteLine("Setting learning rate = " + alpha.ToString("F2"));

            Console.WriteLine("\nStarting training using (stochastic) gradient descent");
            double[] weights = lc.Train(trainData, maxEpochs, alpha);
            Console.WriteLine("Training complete");

            Console.WriteLine("\nBest weights found:");
            ShowVector(weights, 4, true);

            double trainAcc = lc.Accuracy(trainData, weights);

            Console.WriteLine("Prediction accuracy on training data = " +
                              trainAcc.ToString("F4"));

            double testAcc = lc.Accuracy(testData, weights);

            Console.WriteLine("Prediction accuracy on test data = " +
                              testAcc.ToString("F4"));

            Console.WriteLine("\nEnd LR binary classification demo\n");
            Console.ReadLine();
        }
 /// <exception cref="System.Exception"/>
 public CoNLLMentionExtractor(Dictionaries dict, Properties props, Semantics semantics, LogisticClassifier <string, string> singletonModel)
     : this(dict, props, semantics)
 {
     singletonPredictor = singletonModel;
 }