/// <summary> /// Fix tree structure, phrasal categories and part-of-speech labels in newly expanded /// multi-word tokens. /// </summary> /// <exception cref="System.Exception"/> /// <exception cref="Java.Util.Concurrent.ExecutionException"/> private IList <Tree> FixMultiWordTokens(IList <Tree> trees) { bool ner = PropertiesUtils.GetBool(options, "ner", false); // Shared resources IFactory <TreeNormalizer> tnf = new _IFactory_389(); ITreeFactory tf = new LabeledScoredTreeFactory(); IThreadsafeProcessor <ICollection <Tree>, ICollection <Tree> > processor = new AnCoraProcessor.MultiWordProcessor(this, tnf, tf, ner); int availableProcessors = Runtime.GetRuntime().AvailableProcessors(); MulticoreWrapper <ICollection <Tree>, ICollection <Tree> > wrapper = new MulticoreWrapper <ICollection <Tree>, ICollection <Tree> >(availableProcessors, processor, false); // Chunk our work so that parallelization is actually worth it int numChunks = availableProcessors * 20; IList <IList <Tree> > chunked = CollectionUtils.PartitionIntoFolds(trees, numChunks); IList <Tree> ret = new List <Tree>(); foreach (ICollection <Tree> coll in chunked) { wrapper.Put(coll); while (wrapper.Peek()) { Sharpen.Collections.AddAll(ret, wrapper.Poll()); } } wrapper.Join(); while (wrapper.Peek()) { Sharpen.Collections.AddAll(ret, wrapper.Poll()); } return(ret); }
public static void Main(string[] args) { if (args.Length < minArgs) { System.Console.Out.WriteLine(Usage()); System.Environment.Exit(-1); } Properties options = StringUtils.ArgsToProperties(args, ArgDefs()); Language language = PropertiesUtils.Get(options, "l", Language.English, typeof(Language)); ITreebankLangParserParams tlpp = language.@params; DiskTreebank tb = null; string encoding = options.GetProperty("l", "UTF-8"); bool removeBracket = PropertiesUtils.GetBool(options, "b", false); tlpp.SetInputEncoding(encoding); tlpp.SetOutputEncoding(encoding); tb = tlpp.DiskTreebank(); string[] files = options.GetProperty(string.Empty, string.Empty).Split("\\s+"); if (files.Length != 0) { foreach (string filename in files) { tb.LoadPath(filename); } } else { log.Info(Usage()); System.Environment.Exit(-1); } PrintWriter pwo = tlpp.Pw(); string startSymbol = tlpp.TreebankLanguagePack().StartSymbol(); ITreeFactory tf = new LabeledScoredTreeFactory(); int nTrees = 0; foreach (Tree t in tb) { if (removeBracket) { if (t.Value().Equals(startSymbol)) { t = t.FirstChild(); } } else { if (!t.Value().Equals(startSymbol)) { //Add a bracket if it isn't already there t = tf.NewTreeNode(startSymbol, Java.Util.Collections.SingletonList(t)); } } pwo.Println(t.ToString()); nTrees++; } pwo.Close(); System.Console.Error.Printf("Processed %d trees.%n", nTrees); }
private static void ResolveDummyTags(File treeFile, TwoDimensionalCounter <string, string> unigramTagger, bool retainNER, TreeNormalizer tn) { ITreeFactory tf = new LabeledScoredTreeFactory(); MultiWordTreeExpander expander = new MultiWordTreeExpander(); try { BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8")); ITreeReaderFactory trf = new SpanishTreeReaderFactory(); ITreeReader tr = trf.NewTreeReader(br); PrintWriter pw = new PrintWriter(new TextWriter(new FileOutputStream(new File(treeFile + ".fixed")), false, "UTF-8")); int nTrees = 0; for (Tree t; (t = tr.ReadTree()) != null; nTrees++) { TraverseAndFix(t, null, unigramTagger, retainNER); // Now "decompress" further the expanded trees formed by // multiword token splitting t = expander.ExpandPhrases(t, tn, tf); if (tn != null) { t = tn.NormalizeWholeTree(t, tf); } pw.Println(t.ToString()); } pw.Close(); tr.Close(); System.Console.Out.WriteLine("Processed " + nTrees + " trees"); } catch (UnsupportedEncodingException e) { Sharpen.Runtime.PrintStackTrace(e); } catch (FileNotFoundException e) { Sharpen.Runtime.PrintStackTrace(e); } catch (IOException e) { Sharpen.Runtime.PrintStackTrace(e); } }
// static methods /// <summary>Construct a fall through tree in case we can't parse this sentence.</summary> /// <param name="words">Words of the sentence that didn't parse</param> /// <returns> /// A tree with X for all the internal nodes. /// Preterminals have the right tag if the words are tagged. /// </returns> public static Tree XTree <_T0>(IList <_T0> words) where _T0 : IHasWord { ITreeFactory treeFactory = new LabeledScoredTreeFactory(); IList <Tree> lst2 = new List <Tree>(); foreach (IHasWord obj in words) { string s = obj.Word(); Tree t = treeFactory.NewLeaf(s); string tag = "XX"; if (obj is IHasTag) { if (((IHasTag)obj).Tag() != null) { tag = ((IHasTag)obj).Tag(); } } Tree t2 = treeFactory.NewTreeNode(tag, Java.Util.Collections.SingletonList(t)); lst2.Add(t2); } return(treeFactory.NewTreeNode("X", lst2)); }