/// <param name="dataset"/> /// <returns> /// a new GeneralDataset whose features and ids map exactly to those of this GeneralDataset. /// Useful when two Datasets are created independently and one wants to train a model on one dataset and test on the other. -Ramesh. /// </returns> public virtual Edu.Stanford.Nlp.Classify.GeneralDataset <L, F> MapDataset(Edu.Stanford.Nlp.Classify.GeneralDataset <L, F> dataset) { Edu.Stanford.Nlp.Classify.GeneralDataset <L, F> newDataset; if (dataset is RVFDataset) { newDataset = new RVFDataset <L, F>(this.featureIndex, this.labelIndex); } else { newDataset = new Dataset <L, F>(this.featureIndex, this.labelIndex); } this.featureIndex.Lock(); this.labelIndex.Lock(); //System.out.println("inside mapDataset: dataset size:"+dataset.size()); for (int i = 0; i < dataset.Size(); i++) { //System.out.println("inside mapDataset: adding datum number"+i); newDataset.Add(dataset.GetDatum(i)); } //System.out.println("old Dataset stats: numData:"+dataset.size()+" numfeatures:"+dataset.featureIndex().size()+" numlabels:"+dataset.labelIndex.size()); //System.out.println("new Dataset stats: numData:"+newDataset.size()+" numfeatures:"+newDataset.featureIndex().size()+" numlabels:"+newDataset.labelIndex.size()); //System.out.println("this dataset stats: numData:"+size()+" numfeatures:"+featureIndex().size()+" numlabels:"+labelIndex.size()); this.featureIndex.Unlock(); this.labelIndex.Unlock(); return(newDataset); }
/// <param name="dataset"/> /// <returns>a new GeneralDataset whose features and ids map exactly to those of this GeneralDataset. But labels are converted to be another set of labels</returns> public virtual Edu.Stanford.Nlp.Classify.GeneralDataset <L2, F> MapDataset <L2>(Edu.Stanford.Nlp.Classify.GeneralDataset <L, F> dataset, IIndex <L2> newLabelIndex, IDictionary <L, L2> labelMapping, L2 defaultLabel) { Edu.Stanford.Nlp.Classify.GeneralDataset <L2, F> newDataset; if (dataset is RVFDataset) { newDataset = new RVFDataset <L2, F>(this.featureIndex, newLabelIndex); } else { newDataset = new Dataset <L2, F>(this.featureIndex, newLabelIndex); } this.featureIndex.Lock(); this.labelIndex.Lock(); //System.out.println("inside mapDataset: dataset size:"+dataset.size()); for (int i = 0; i < dataset.Size(); i++) { //System.out.println("inside mapDataset: adding datum number"+i); IDatum <L, F> d = dataset.GetDatum(i); IDatum <L2, F> d2 = MapDatum(d, labelMapping, defaultLabel); newDataset.Add(d2); } //System.out.println("old Dataset stats: numData:"+dataset.size()+" numfeatures:"+dataset.featureIndex().size()+" numlabels:"+dataset.labelIndex.size()); //System.out.println("new Dataset stats: numData:"+newDataset.size()+" numfeatures:"+newDataset.featureIndex().size()+" numlabels:"+newDataset.labelIndex.size()); //System.out.println("this dataset stats: numData:"+size()+" numfeatures:"+featureIndex().size()+" numlabels:"+labelIndex.size()); this.featureIndex.Unlock(); this.labelIndex.Unlock(); return(newDataset); }