Exemplo n.º 1
0
 /**
  * <summary> Constructor for the DataSetGenerator which takes input the data directory, the pattern for the training files
  * included, includePunctuation, and an instanceGenerator. The constructor loads the treeBank from the given directory
  * including the given files having the given pattern. If punctuations are not included, they are removed from
  * the data.</summary>
  * <param name="directory">Directory where the treeBank files reside.</param>
  * <param name="pattern">Pattern of the tree files to be included in the treeBank. Use "." for all files.</param>
  * <param name="includePunctuation">If true, punctuation symbols are also included in the dataset, false otherwise.</param>
  * <param name="instanceGenerator">The instance generator used to generate the dataSet.</param>
  */
 public DataSetGenerator(string directory, string pattern, bool includePunctuation,
                         InstanceGenerator.InstanceGenerator instanceGenerator)
 {
     _treeBank = new TreeBankDrawable(directory, pattern);
     this.instanceGenerator = instanceGenerator;
     if (!includePunctuation)
     {
         _treeBank.StripPunctuation();
     }
 }
Exemplo n.º 2
0
 /**
  * <summary> Mutator for the instanceGenerator attribute.</summary>
  * <param name="instanceGenerator">Input instanceGenerator</param>
  */
 public void SetInstanceGenerator(InstanceGenerator.InstanceGenerator instanceGenerator)
 {
     this.instanceGenerator = instanceGenerator;
 }
Exemplo n.º 3
0
 /**
  * <summary> Constructor for the AnnotatedDataSetGenerator which takes input the data directory, the pattern for the
  * training files included, and an instanceGenerator. The constructor loads the sentence corpus from the given
  * directory including the given files having the given pattern.</summary>
  * <param name="directory">Directory where the corpus files reside.</param>
  * <param name="pattern">Pattern of the tree files to be included in the treebank. Use "." for all files.</param>
  * <param name="instanceGenerator">The instance generator used to generate the dataset.</param>
  */
 public AnnotatedDataSetGenerator(string directory, string pattern,
                                  InstanceGenerator.InstanceGenerator instanceGenerator)
 {
     _corpus = new AnnotatedCorpus(directory, pattern);
     this.instanceGenerator = instanceGenerator;
 }