/// <summary>Constructs a Dataset by reading in a file in SVM light format.</summary> /// <remarks> /// Constructs a Dataset by reading in a file in SVM light format. /// the created dataset has the same feature and label index as given /// </remarks> public static Edu.Stanford.Nlp.Classify.Dataset <string, string> ReadSVMLightFormat(string filename, IIndex <string> featureIndex, IIndex <string> labelIndex, IList <string> lines) { Edu.Stanford.Nlp.Classify.Dataset <string, string> dataset; try { dataset = new Edu.Stanford.Nlp.Classify.Dataset <string, string>(10, featureIndex, labelIndex); foreach (string line in ObjectBank.GetLineIterator(new File(filename))) { if (lines != null) { lines.Add(line); } dataset.Add(SvmLightLineToDatum(line)); } } catch (Exception e) { throw new Exception(e); } return(dataset); }
/// <summary> /// <inheritDoc/> /// /// </summary> public override Pair <GeneralDataset <L, F>, GeneralDataset <L, F> > Split(int start, int end) { int devSize = end - start; int trainSize = Size() - devSize; int[][] devData = new int[devSize][]; int[] devLabels = new int[devSize]; int[][] trainData = new int[trainSize][]; int[] trainLabels = new int[trainSize]; lock (typeof(Runtime)) { System.Array.Copy(data, start, devData, 0, devSize); System.Array.Copy(labels, start, devLabels, 0, devSize); System.Array.Copy(data, 0, trainData, 0, start); System.Array.Copy(data, end, trainData, start, Size() - end); System.Array.Copy(labels, 0, trainLabels, 0, start); System.Array.Copy(labels, end, trainLabels, start, Size() - end); } if (this is WeightedDataset <object, object> ) { float[] trainWeights = new float[trainSize]; float[] devWeights = new float[devSize]; WeightedDataset <L, F> w = (WeightedDataset <L, F>) this; lock (typeof(Runtime)) { System.Array.Copy(w.weights, start, devWeights, 0, devSize); System.Array.Copy(w.weights, 0, trainWeights, 0, start); System.Array.Copy(w.weights, end, trainWeights, start, Size() - end); } WeightedDataset <L, F> dev = new WeightedDataset <L, F>(labelIndex, devLabels, featureIndex, devData, devSize, devWeights); WeightedDataset <L, F> train = new WeightedDataset <L, F>(labelIndex, trainLabels, featureIndex, trainData, trainSize, trainWeights); return(new Pair <GeneralDataset <L, F>, GeneralDataset <L, F> >(train, dev)); } Edu.Stanford.Nlp.Classify.Dataset <L, F> dev_1 = new Edu.Stanford.Nlp.Classify.Dataset <L, F>(labelIndex, devLabels, featureIndex, devData, devSize); Edu.Stanford.Nlp.Classify.Dataset <L, F> train_1 = new Edu.Stanford.Nlp.Classify.Dataset <L, F>(labelIndex, trainLabels, featureIndex, trainData, trainSize); return(new Pair <GeneralDataset <L, F>, GeneralDataset <L, F> >(train_1, dev_1)); }