private TaggedFileRecord(string file, TaggedFileRecord.Format format, string encoding, string tagSeparator, ITreeTransformer treeTransformer, TreeNormalizer treeNormalizer, ITreeReaderFactory trf, NumberRangesFileFilter treeRange, IPredicate <Tree> treeFilter, int wordColumn, int tagColumn) { // represents a tokenized file separated by text // represents a tsv file such as a conll file // represents a file in PTB format this.file = file; this.format = format; this.encoding = encoding; this.tagSeparator = tagSeparator; this.treeTransformer = treeTransformer; this.treeNormalizer = treeNormalizer; this.treeRange = treeRange; this.treeFilter = treeFilter; this.wordColumn = wordColumn; this.tagColumn = tagColumn; this.trf = trf; }
public static Edu.Stanford.Nlp.Tagger.IO.TaggedFileRecord CreateRecord(Properties config, string description) { string[] pieces = description.Split(","); if (pieces.Length == 1) { return(new Edu.Stanford.Nlp.Tagger.IO.TaggedFileRecord(description, TaggedFileRecord.Format.Text, GetEncoding(config), GetTagSeparator(config), null, null, null, null, null, null, null)); } string[] args = new string[pieces.Length - 1]; System.Array.Copy(pieces, 0, args, 0, pieces.Length - 1); string file = pieces[pieces.Length - 1]; TaggedFileRecord.Format format = TaggedFileRecord.Format.Text; string encoding = GetEncoding(config); string tagSeparator = GetTagSeparator(config); ITreeTransformer treeTransformer = null; TreeNormalizer treeNormalizer = null; ITreeReaderFactory trf = null; NumberRangesFileFilter treeRange = null; IPredicate <Tree> treeFilter = null; int wordColumn = null; int tagColumn = null; foreach (string arg in args) { string[] argPieces = arg.Split("=", 2); if (argPieces.Length != 2) { throw new ArgumentException("TaggedFileRecord argument " + arg + " has an unexpected number of =s"); } if (Sharpen.Runtime.EqualsIgnoreCase(argPieces[0], Format)) { format = TaggedFileRecord.Format.ValueOf(argPieces[1]); } else { if (Sharpen.Runtime.EqualsIgnoreCase(argPieces[0], Encoding)) { encoding = argPieces[1]; } else { if (Sharpen.Runtime.EqualsIgnoreCase(argPieces[0], TagSeparator)) { tagSeparator = argPieces[1]; } else { if (Sharpen.Runtime.EqualsIgnoreCase(argPieces[0], TreeTransformer)) { treeTransformer = ReflectionLoading.LoadByReflection(argPieces[1]); } else { if (Sharpen.Runtime.EqualsIgnoreCase(argPieces[0], TreeNormalizer)) { treeNormalizer = ReflectionLoading.LoadByReflection(argPieces[1]); } else { if (Sharpen.Runtime.EqualsIgnoreCase(argPieces[0], TreeReader)) { trf = ReflectionLoading.LoadByReflection(argPieces[1]); } else { if (Sharpen.Runtime.EqualsIgnoreCase(argPieces[0], TreeRange)) { string range = argPieces[1].ReplaceAll(":", ","); treeRange = new NumberRangesFileFilter(range, true); } else { if (Sharpen.Runtime.EqualsIgnoreCase(argPieces[0], TreeFilter)) { treeFilter = ReflectionLoading.LoadByReflection(argPieces[1]); } else { if (Sharpen.Runtime.EqualsIgnoreCase(argPieces[0], WordColumn)) { wordColumn = int.Parse(argPieces[1]); } else { if (Sharpen.Runtime.EqualsIgnoreCase(argPieces[0], TagColumn)) { tagColumn = int.Parse(argPieces[1]); } else { throw new ArgumentException("TaggedFileRecord argument " + argPieces[0] + " is unknown"); } } } } } } } } } } } return(new Edu.Stanford.Nlp.Tagger.IO.TaggedFileRecord(file, format, encoding, tagSeparator, treeTransformer, treeNormalizer, trf, treeRange, treeFilter, wordColumn, tagColumn)); }