コード例 #1
0
        private static Edu.Stanford.Nlp.Classify.RVFDataset <string, string> ReadSVMLightFormat(string filename, IIndex <string> featureIndex, IIndex <string> labelIndex, IList <string> lines)
        {
            BufferedReader @in = null;

            Edu.Stanford.Nlp.Classify.RVFDataset <string, string> dataset;
            try
            {
                dataset = new Edu.Stanford.Nlp.Classify.RVFDataset <string, string>(10, featureIndex, labelIndex);
                @in     = IOUtils.ReaderFromString(filename);
                while (@in.Ready())
                {
                    string line = @in.ReadLine();
                    if (lines != null)
                    {
                        lines.Add(line);
                    }
                    dataset.Add(SvmLightLineToRVFDatum(line));
                }
            }
            catch (IOException e)
            {
                throw new RuntimeIOException(e);
            }
            finally
            {
                IOUtils.CloseIgnoringExceptions(@in);
            }
            return(dataset);
        }
コード例 #2
0
 public virtual void AddAllWithSourcesAndIds(Edu.Stanford.Nlp.Classify.RVFDataset <L, F> data)
 {
     for (int index = 0; index < data.size; index++)
     {
         this.Add(data.GetRVFDatumWithId(index), data.GetRVFDatumSource(index), data.GetRVFDatumId(index));
     }
 }
コード例 #3
0
        public static void Main(string[] args)
        {
            Edu.Stanford.Nlp.Classify.RVFDataset <string, string> data = new Edu.Stanford.Nlp.Classify.RVFDataset <string, string>();
            ClassicCounter <string> c1 = new ClassicCounter <string>();

            c1.IncrementCount("fever", 3.5);
            c1.IncrementCount("cough", 1.1);
            c1.IncrementCount("congestion", 4.2);
            ClassicCounter <string> c2 = new ClassicCounter <string>();

            c2.IncrementCount("fever", 1.5);
            c2.IncrementCount("cough", 2.1);
            c2.IncrementCount("nausea", 3.2);
            ClassicCounter <string> c3 = new ClassicCounter <string>();

            c3.IncrementCount("cough", 2.5);
            c3.IncrementCount("congestion", 3.2);
            data.Add(new RVFDatum <string, string>(c1, "cold"));
            data.Add(new RVFDatum <string, string>(c2, "flu"));
            data.Add(new RVFDatum <string, string>(c3, "cold"));
            data.SummaryStatistics();
            LinearClassifierFactory <string, string> factory = new LinearClassifierFactory <string, string>();

            factory.UseQuasiNewton();
            LinearClassifier <string, string> c  = factory.TrainClassifier(data);
            ClassicCounter <string>           c4 = new ClassicCounter <string>();

            c4.IncrementCount("cough", 2.3);
            c4.IncrementCount("fever", 1.3);
            RVFDatum <string, string> datum = new RVFDatum <string, string>(c4);

            c.JustificationOf((IDatum <string, string>)datum);
        }
コード例 #4
0
 public virtual Edu.Stanford.Nlp.Classify.RVFDataset <L, F> ScaleDatasetGaussian(Edu.Stanford.Nlp.Classify.RVFDataset <L, F> dataset)
 {
     Edu.Stanford.Nlp.Classify.RVFDataset <L, F> newDataset = new Edu.Stanford.Nlp.Classify.RVFDataset <L, F>(this.featureIndex, this.labelIndex);
     for (int i = 0; i < dataset.Size(); i++)
     {
         RVFDatum <L, F> datum = ((RVFDatum <L, F>)dataset.GetDatum(i));
         newDataset.Add(ScaleDatumGaussian(datum));
     }
     return(newDataset);
 }
コード例 #5
0
        public override Pair <GeneralDataset <L, F>, GeneralDataset <L, F> > Split(int start, int end)
        {
            int devSize   = end - start;
            int trainSize = Size() - devSize;

            int[][]    devData     = new int[devSize][];
            double[][] devValues   = new double[devSize][];
            int[]      devLabels   = new int[devSize];
            int[][]    trainData   = new int[trainSize][];
            double[][] trainValues = new double[trainSize][];
            int[]      trainLabels = new int[trainSize];
            lock (typeof(Runtime))
            {
                System.Array.Copy(data, start, devData, 0, devSize);
                System.Array.Copy(values, start, devValues, 0, devSize);
                System.Array.Copy(labels, start, devLabels, 0, devSize);
                System.Array.Copy(data, 0, trainData, 0, start);
                System.Array.Copy(data, end, trainData, start, Size() - end);
                System.Array.Copy(values, 0, trainValues, 0, start);
                System.Array.Copy(values, end, trainValues, start, Size() - end);
                System.Array.Copy(labels, 0, trainLabels, 0, start);
                System.Array.Copy(labels, end, trainLabels, start, Size() - end);
            }
            if (this is WeightedRVFDataset <object, object> )
            {
                float[] trainWeights        = new float[trainSize];
                float[] devWeights          = new float[devSize];
                WeightedRVFDataset <L, F> w = (WeightedRVFDataset <L, F>) this;
                lock (typeof(Runtime))
                {
                    System.Array.Copy(w.weights, start, devWeights, 0, devSize);
                    System.Array.Copy(w.weights, 0, trainWeights, 0, start);
                    System.Array.Copy(w.weights, end, trainWeights, start, Size() - end);
                }
                WeightedRVFDataset <L, F> dev   = new WeightedRVFDataset <L, F>(labelIndex, devLabels, featureIndex, devData, devValues, devWeights);
                WeightedRVFDataset <L, F> train = new WeightedRVFDataset <L, F>(labelIndex, trainLabels, featureIndex, trainData, trainValues, trainWeights);
                return(new Pair <GeneralDataset <L, F>, GeneralDataset <L, F> >(train, dev));
            }
            else
            {
                GeneralDataset <L, F> dev   = new Edu.Stanford.Nlp.Classify.RVFDataset <L, F>(labelIndex, devLabels, featureIndex, devData, devValues);
                GeneralDataset <L, F> train = new Edu.Stanford.Nlp.Classify.RVFDataset <L, F>(labelIndex, trainLabels, featureIndex, trainData, trainValues);
                return(new Pair <GeneralDataset <L, F>, GeneralDataset <L, F> >(train, dev));
            }
        }
コード例 #6
0
        public override Pair <GeneralDataset <L, F>, GeneralDataset <L, F> > Split(double percentDev)
        {
            int devSize   = (int)(percentDev * Size());
            int trainSize = Size() - devSize;

            int[][]    devData     = new int[devSize][];
            double[][] devValues   = new double[devSize][];
            int[]      devLabels   = new int[devSize];
            int[][]    trainData   = new int[trainSize][];
            double[][] trainValues = new double[trainSize][];
            int[]      trainLabels = new int[trainSize];
            lock (typeof(Runtime))
            {
                System.Array.Copy(data, 0, devData, 0, devSize);
                System.Array.Copy(values, 0, devValues, 0, devSize);
                System.Array.Copy(labels, 0, devLabels, 0, devSize);
                System.Array.Copy(data, devSize, trainData, 0, trainSize);
                System.Array.Copy(values, devSize, trainValues, 0, trainSize);
                System.Array.Copy(labels, devSize, trainLabels, 0, trainSize);
            }
            Edu.Stanford.Nlp.Classify.RVFDataset <L, F> dev   = new Edu.Stanford.Nlp.Classify.RVFDataset <L, F>(labelIndex, devLabels, featureIndex, devData, devValues);
            Edu.Stanford.Nlp.Classify.RVFDataset <L, F> train = new Edu.Stanford.Nlp.Classify.RVFDataset <L, F>(labelIndex, trainLabels, featureIndex, trainData, trainValues);
            return(new Pair <GeneralDataset <L, F>, GeneralDataset <L, F> >(train, dev));
        }