public static void Main(string[] args) { // TODO: rather than always rolling our own arg parser, we should // find a library which does it for us nicely string outputFile = null; string sentencesFile = null; string labelsFile = null; string parserFile = LexicalizedParser.DefaultParserLoc; string taggerFile = null; ParseAndSetLabels.MissingLabels missing = ParseAndSetLabels.MissingLabels.Default; string defaultLabel = "-1"; string separator = "\\t+"; string saveUnknownsFile = null; string remapLabels = null; int argIndex = 0; bool binarize = true; bool useLabelKeys = false; while (argIndex < args.Length) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output")) { outputFile = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-sentences")) { sentencesFile = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-labels")) { labelsFile = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-parser")) { parserFile = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-tagger")) { taggerFile = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-missing")) { missing = ParseAndSetLabels.MissingLabels.ValueOf(args[argIndex + 1]); argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-separator")) { separator = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-default")) { defaultLabel = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-saveUnknowns")) { saveUnknownsFile = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-remapLabels")) { remapLabels = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-binarize")) { binarize = true; argIndex += 1; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-nobinarize")) { binarize = false; argIndex += 1; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-useLabelKeys")) { useLabelKeys = true; argIndex += 1; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-nouseLabelKeys")) { useLabelKeys = false; argIndex += 1; } else { throw new ArgumentException("Unknown argument " + args[argIndex]); } } } } } } } } } } } } } } } if (outputFile == null) { throw new ArgumentException("-output is required"); } if (sentencesFile == null && !useLabelKeys) { throw new ArgumentException("-sentences or -useLabelKeys is required"); } if (sentencesFile != null && useLabelKeys) { throw new ArgumentException("Use only one of -sentences or -useLabelKeys"); } if (labelsFile == null) { throw new ArgumentException("-labels is required"); } ParserGrammar parser = LoadParser(parserFile, taggerFile); TreeBinarizer binarizer = null; if (binarize) { binarizer = TreeBinarizer.SimpleTreeBinarizer(parser.GetTLPParams().HeadFinder(), parser.TreebankLanguagePack()); } IDictionary <string, string> labelMap = ReadLabelMap(labelsFile, separator, remapLabels); IList <string> sentences; if (sentencesFile != null) { sentences = ReadSentences(sentencesFile); } else { sentences = new List <string>(labelMap.Keys); } IList <Tree> trees = ParseSentences(sentences, parser, binarizer); ICollection <string> unknowns = SetLabels(trees, labelMap, missing, defaultLabel); WriteTrees(trees, outputFile); }
// static methods public static void SetLabels(Tree tree, IDictionary <string, string> labelMap, ParseAndSetLabels.MissingLabels missing, string defaultLabel, ICollection <string> unknowns) { if (tree.IsLeaf()) { return; } string text = SentenceUtils.ListToString(tree.Yield()); string label = labelMap[text]; if (label != null) { tree.Label().SetValue(label); } else { switch (missing) { case ParseAndSetLabels.MissingLabels.Fail: { throw new Exception("No label for '" + text + "'"); } case ParseAndSetLabels.MissingLabels.Default: { tree.Label().SetValue(defaultLabel); unknowns.Add(text); break; } case ParseAndSetLabels.MissingLabels.KeepOriginal: { // do nothing break; } default: { throw new ArgumentException("Unknown MissingLabels mode " + missing); } } } foreach (Tree child in tree.Children()) { SetLabels(child, labelMap, missing, defaultLabel, unknowns); } }
public static ICollection <string> SetLabels(IList <Tree> trees, IDictionary <string, string> labelMap, ParseAndSetLabels.MissingLabels missing, string defaultLabel) { logger.Info("Setting labels"); ICollection <string> unknowns = new HashSet <string>(); foreach (Tree tree in trees) { SetLabels(tree, labelMap, missing, defaultLabel, unknowns); } return(unknowns); }