private static Edu.Stanford.Nlp.Classify.RVFDataset <string, string> ReadSVMLightFormat(string filename, IIndex <string> featureIndex, IIndex <string> labelIndex, IList <string> lines) { BufferedReader @in = null; Edu.Stanford.Nlp.Classify.RVFDataset <string, string> dataset; try { dataset = new Edu.Stanford.Nlp.Classify.RVFDataset <string, string>(10, featureIndex, labelIndex); @in = IOUtils.ReaderFromString(filename); while (@in.Ready()) { string line = @in.ReadLine(); if (lines != null) { lines.Add(line); } dataset.Add(SvmLightLineToRVFDatum(line)); } } catch (IOException e) { throw new RuntimeIOException(e); } finally { IOUtils.CloseIgnoringExceptions(@in); } return(dataset); }
public virtual void AddAllWithSourcesAndIds(Edu.Stanford.Nlp.Classify.RVFDataset <L, F> data) { for (int index = 0; index < data.size; index++) { this.Add(data.GetRVFDatumWithId(index), data.GetRVFDatumSource(index), data.GetRVFDatumId(index)); } }
public static void Main(string[] args) { Edu.Stanford.Nlp.Classify.RVFDataset <string, string> data = new Edu.Stanford.Nlp.Classify.RVFDataset <string, string>(); ClassicCounter <string> c1 = new ClassicCounter <string>(); c1.IncrementCount("fever", 3.5); c1.IncrementCount("cough", 1.1); c1.IncrementCount("congestion", 4.2); ClassicCounter <string> c2 = new ClassicCounter <string>(); c2.IncrementCount("fever", 1.5); c2.IncrementCount("cough", 2.1); c2.IncrementCount("nausea", 3.2); ClassicCounter <string> c3 = new ClassicCounter <string>(); c3.IncrementCount("cough", 2.5); c3.IncrementCount("congestion", 3.2); data.Add(new RVFDatum <string, string>(c1, "cold")); data.Add(new RVFDatum <string, string>(c2, "flu")); data.Add(new RVFDatum <string, string>(c3, "cold")); data.SummaryStatistics(); LinearClassifierFactory <string, string> factory = new LinearClassifierFactory <string, string>(); factory.UseQuasiNewton(); LinearClassifier <string, string> c = factory.TrainClassifier(data); ClassicCounter <string> c4 = new ClassicCounter <string>(); c4.IncrementCount("cough", 2.3); c4.IncrementCount("fever", 1.3); RVFDatum <string, string> datum = new RVFDatum <string, string>(c4); c.JustificationOf((IDatum <string, string>)datum); }
public virtual Edu.Stanford.Nlp.Classify.RVFDataset <L, F> ScaleDatasetGaussian(Edu.Stanford.Nlp.Classify.RVFDataset <L, F> dataset) { Edu.Stanford.Nlp.Classify.RVFDataset <L, F> newDataset = new Edu.Stanford.Nlp.Classify.RVFDataset <L, F>(this.featureIndex, this.labelIndex); for (int i = 0; i < dataset.Size(); i++) { RVFDatum <L, F> datum = ((RVFDatum <L, F>)dataset.GetDatum(i)); newDataset.Add(ScaleDatumGaussian(datum)); } return(newDataset); }
public override Pair <GeneralDataset <L, F>, GeneralDataset <L, F> > Split(int start, int end) { int devSize = end - start; int trainSize = Size() - devSize; int[][] devData = new int[devSize][]; double[][] devValues = new double[devSize][]; int[] devLabels = new int[devSize]; int[][] trainData = new int[trainSize][]; double[][] trainValues = new double[trainSize][]; int[] trainLabels = new int[trainSize]; lock (typeof(Runtime)) { System.Array.Copy(data, start, devData, 0, devSize); System.Array.Copy(values, start, devValues, 0, devSize); System.Array.Copy(labels, start, devLabels, 0, devSize); System.Array.Copy(data, 0, trainData, 0, start); System.Array.Copy(data, end, trainData, start, Size() - end); System.Array.Copy(values, 0, trainValues, 0, start); System.Array.Copy(values, end, trainValues, start, Size() - end); System.Array.Copy(labels, 0, trainLabels, 0, start); System.Array.Copy(labels, end, trainLabels, start, Size() - end); } if (this is WeightedRVFDataset <object, object> ) { float[] trainWeights = new float[trainSize]; float[] devWeights = new float[devSize]; WeightedRVFDataset <L, F> w = (WeightedRVFDataset <L, F>) this; lock (typeof(Runtime)) { System.Array.Copy(w.weights, start, devWeights, 0, devSize); System.Array.Copy(w.weights, 0, trainWeights, 0, start); System.Array.Copy(w.weights, end, trainWeights, start, Size() - end); } WeightedRVFDataset <L, F> dev = new WeightedRVFDataset <L, F>(labelIndex, devLabels, featureIndex, devData, devValues, devWeights); WeightedRVFDataset <L, F> train = new WeightedRVFDataset <L, F>(labelIndex, trainLabels, featureIndex, trainData, trainValues, trainWeights); return(new Pair <GeneralDataset <L, F>, GeneralDataset <L, F> >(train, dev)); } else { GeneralDataset <L, F> dev = new Edu.Stanford.Nlp.Classify.RVFDataset <L, F>(labelIndex, devLabels, featureIndex, devData, devValues); GeneralDataset <L, F> train = new Edu.Stanford.Nlp.Classify.RVFDataset <L, F>(labelIndex, trainLabels, featureIndex, trainData, trainValues); return(new Pair <GeneralDataset <L, F>, GeneralDataset <L, F> >(train, dev)); } }
public override Pair <GeneralDataset <L, F>, GeneralDataset <L, F> > Split(double percentDev) { int devSize = (int)(percentDev * Size()); int trainSize = Size() - devSize; int[][] devData = new int[devSize][]; double[][] devValues = new double[devSize][]; int[] devLabels = new int[devSize]; int[][] trainData = new int[trainSize][]; double[][] trainValues = new double[trainSize][]; int[] trainLabels = new int[trainSize]; lock (typeof(Runtime)) { System.Array.Copy(data, 0, devData, 0, devSize); System.Array.Copy(values, 0, devValues, 0, devSize); System.Array.Copy(labels, 0, devLabels, 0, devSize); System.Array.Copy(data, devSize, trainData, 0, trainSize); System.Array.Copy(values, devSize, trainValues, 0, trainSize); System.Array.Copy(labels, devSize, trainLabels, 0, trainSize); } Edu.Stanford.Nlp.Classify.RVFDataset <L, F> dev = new Edu.Stanford.Nlp.Classify.RVFDataset <L, F>(labelIndex, devLabels, featureIndex, devData, devValues); Edu.Stanford.Nlp.Classify.RVFDataset <L, F> train = new Edu.Stanford.Nlp.Classify.RVFDataset <L, F>(labelIndex, trainLabels, featureIndex, trainData, trainValues); return(new Pair <GeneralDataset <L, F>, GeneralDataset <L, F> >(train, dev)); }