/** * <summary> Constructor for the DataSetGenerator which takes input the data directory, the pattern for the training files * included, includePunctuation, and an instanceGenerator. The constructor loads the treeBank from the given directory * including the given files having the given pattern. If punctuations are not included, they are removed from * the data.</summary> * <param name="directory">Directory where the treeBank files reside.</param> * <param name="pattern">Pattern of the tree files to be included in the treeBank. Use "." for all files.</param> * <param name="includePunctuation">If true, punctuation symbols are also included in the dataset, false otherwise.</param> * <param name="instanceGenerator">The instance generator used to generate the dataSet.</param> */ public DataSetGenerator(string directory, string pattern, bool includePunctuation, InstanceGenerator.InstanceGenerator instanceGenerator) { _treeBank = new TreeBankDrawable(directory, pattern); this.instanceGenerator = instanceGenerator; if (!includePunctuation) { _treeBank.StripPunctuation(); } }