protected override void Train(IDataset ds) { bool use_junk = PGetb("junk") && !DisableJunk; int nsamples = ds.nSamples(); if (PExists("%nsamples")) nsamples += PGeti("%nsamples"); Global.Debugf("info", "Training content classifier"); if (CharClass.IsEmpty) { Initialize(CreateClassesFromDataset(ds)); } if (use_junk/*&& !JunkClass.IsEmpty*/) { Intarray nonjunk = new Intarray(); for (int i = 0; i < ds.nSamples(); i++) if (ds.Cls(i) != jc()) nonjunk.Push(i); Datasubset nonjunkds = new Datasubset(ds, nonjunk); CharClass.TrainDense(nonjunkds, PGeti("epochs")); } else { CharClass.TrainDense(ds, PGeti("epochs")); } if (use_junk /*&& !JunkClass.IsEmpty*/) { Global.Debugf("info", "Training junk classifier"); Intarray isjunk = new Intarray(); int njunk = 0; for (int i = 0; i < ds.nSamples(); i++) { bool j = (ds.Cls(i) == jc()); isjunk.Push(JunkClass.Classes[Convert.ToInt32(j)]); if (j) njunk++; } if (njunk > 0) { MappedDataset junkds = new MappedDataset(ds, isjunk); JunkClass.TrainDense(junkds, PGeti("epochs")); } else { Global.Debugf("warn", "you are training a junk class but there are no samples to train on"); JunkClass.DeleteLenet(); } } PSet("%nsamples", nsamples); }
protected override void Train(IDataset ds) { bool use_junk = PGetb("junk") && !DisableJunk; if (charclass.IsEmpty) { charclass.SetComponent(ComponentCreator.MakeComponent(PGet("charclass"))); TryAttachCharClassifierEvent(charclass.Object); } if (junkclass.IsEmpty) { junkclass.SetComponent(ComponentCreator.MakeComponent(PGet("junkclass"))); TryAttachJunkClassifierEvent(junkclass.Object); } if (ulclass.IsEmpty) ulclass.SetComponent(ComponentCreator.MakeComponent(PGet("ulclass"))); Global.Debugf("info", "Training content classifier"); if (use_junk && !junkclass.IsEmpty) { Intarray nonjunk = new Intarray(); for (int i = 0; i < ds.nSamples(); i++) if (ds.Cls(i) != jc()) nonjunk.Push(i); Datasubset nonjunkds = new Datasubset(ds, nonjunk); charclass.Object.XTrain(nonjunkds); } else { charclass.Object.XTrain(ds); } if (use_junk && !junkclass.IsEmpty) { Global.Debugf("info", "Training junk classifier"); Intarray isjunk = new Intarray(); int njunk = 0; for (int i = 0; i < ds.nSamples(); i++) { bool j = (ds.Cls(i) == jc()); isjunk.Push(Convert.ToInt32(j)); if (j) njunk++; } if (njunk > 0) { MappedDataset junkds = new MappedDataset(ds, isjunk); junkclass.Object.XTrain(junkds); } else { Global.Debugf("warn", "you are training a junk class but there are no samples to train on"); junkclass.SetComponent(null); } if (PGeti("ul") > 0 && !ulclass.IsEmpty) { throw new Exception("ulclass not implemented"); } } }
public override void TrainDense(IDataset ds) { //PSet("%nsamples", ds.nSamples()); float split = PGetf("cv_split"); int mlp_cv_max = PGeti("cv_max"); if (crossvalidate) { // perform a split for cross-validation, making sure // that we don't have the same sample in both the // test and the training set (even if the data set // is the result of resampling) Intarray test_ids = new Intarray(); Intarray ids = new Intarray(); for (int i = 0; i < ds.nSamples(); i++) ids.Push(ds.Id(i)); NarrayUtil.Uniq(ids); Global.Debugf("cvdetail", "reduced {0} ids to {1} ids", ds.nSamples(), ids.Length()); NarrayUtil.Shuffle(ids); int nids = (int)((1.0 - split) * ids.Length()); nids = Math.Min(nids, mlp_cv_max); for (int i = 0; i < nids; i++) test_ids.Push(ids[i]); NarrayUtil.Quicksort(test_ids); Intarray training = new Intarray(); Intarray testing = new Intarray(); for (int i = 0; i < ds.nSamples(); i++) { int id = ds.Id(i); if (ClassifierUtil.Bincontains(test_ids, id)) testing.Push(i); else training.Push(i); } Global.Debugf("cvdetail", "#training {0} #testing {1}", training.Length(), testing.Length()); PSet("%ntraining", training.Length()); PSet("%ntesting", testing.Length()); Datasubset trs = new Datasubset(ds, training); Datasubset tss = new Datasubset(ds, testing); TrainBatch(trs, tss); } else { TrainBatch(ds, ds); } }