/// <exception cref="System.IO.IOException"/> private static AnnotationSerializer.IntermediateSemanticGraph LoadDependencyGraph(BufferedReader reader) { AnnotationSerializer.IntermediateSemanticGraph graph = new AnnotationSerializer.IntermediateSemanticGraph(); // first line: list of nodes string line = reader.ReadLine().Trim(); // System.out.println("PARSING LINE: " + line); if (line.Length > 0) { string[] bits = line.Split("\t"); if (bits.Length < 3) { throw new Exception("ERROR: Invalid dependency node line: " + line); } string docId = bits[0]; if (docId.Equals("-")) { docId = string.Empty; } int sentIndex = System.Convert.ToInt32(bits[1]); for (int i = 2; i < bits.Length; i++) { string bit = bits[i]; string[] bbits = bit.Split("-"); int copyAnnotation = -1; bool isRoot = false; if (bbits.Length > 3) { throw new Exception("ERROR: Invalid format for dependency graph: " + line); } else { if (bbits.Length == 2) { copyAnnotation = System.Convert.ToInt32(bbits[1]); } else { if (bbits.Length == 3) { copyAnnotation = System.Convert.ToInt32(bbits[1]); isRoot = bbits[2].Equals("R"); } } } int index = System.Convert.ToInt32(bbits[0]); graph.nodes.Add(new AnnotationSerializer.IntermediateNode(docId, sentIndex, index, copyAnnotation, isRoot)); } } // second line: list of deps line = reader.ReadLine().Trim(); if (line.Length > 0) { string[] bits = line.Split("\t"); foreach (string bit in bits) { string[] bbits = bit.Split(" "); if (bbits.Length < 3 || bbits.Length > 6) { throw new Exception("ERROR: Invalid format for dependency graph: " + line); } string dep = bbits[0]; int source = System.Convert.ToInt32(bbits[1]); int target = System.Convert.ToInt32(bbits[2]); bool isExtra = (bbits.Length == 4) ? bool.Parse(bbits[3]) : false; int sourceCopy = (bbits.Length > 4) ? System.Convert.ToInt32(bbits[4]) : 0; int targetCopy = (bbits.Length > 5) ? System.Convert.ToInt32(bbits[5]) : 0; graph.edges.Add(new AnnotationSerializer.IntermediateEdge(dep, source, sourceCopy, target, targetCopy, isExtra)); } } return(graph); }
/// <exception cref="System.IO.IOException"/> public override Pair <Annotation, InputStream> Read(InputStream @is) { if (compress && !(@is is GZIPInputStream)) { @is = new GZIPInputStream(@is); } BufferedReader reader = new BufferedReader(new InputStreamReader(@is)); Annotation doc = new Annotation(string.Empty); string line; // read the coref graph (new format) IDictionary <int, CorefChain> chains = LoadCorefChains(reader); if (chains != null) { doc.Set(typeof(CorefCoreAnnotations.CorefChainAnnotation), chains); } // read the coref graph (old format) line = reader.ReadLine().Trim(); if (line.Length > 0) { string[] bits = line.Split(" "); if (bits.Length % 4 != 0) { throw new RuntimeIOException("ERROR: Incorrect format for the serialized coref graph: " + line); } IList <Pair <IntTuple, IntTuple> > corefGraph = new List <Pair <IntTuple, IntTuple> >(); for (int i = 0; i < bits.Length; i += 4) { IntTuple src = new IntTuple(2); IntTuple dst = new IntTuple(2); src.Set(0, System.Convert.ToInt32(bits[i])); src.Set(1, System.Convert.ToInt32(bits[i + 1])); dst.Set(0, System.Convert.ToInt32(bits[i + 2])); dst.Set(1, System.Convert.ToInt32(bits[i + 3])); corefGraph.Add(new Pair <IntTuple, IntTuple>(src, dst)); } doc.Set(typeof(CorefCoreAnnotations.CorefGraphAnnotation), corefGraph); } // read individual sentences IList <ICoreMap> sentences = new List <ICoreMap>(); while ((line = reader.ReadLine()) != null) { ICoreMap sentence = new Annotation(string.Empty); // first line is the parse tree. construct it with CoreLabels in Tree nodes Tree tree = new PennTreeReader(new StringReader(line), new LabeledScoredTreeFactory(CoreLabel.Factory())).ReadTree(); sentence.Set(typeof(TreeCoreAnnotations.TreeAnnotation), tree); // read the dependency graphs AnnotationSerializer.IntermediateSemanticGraph intermCollapsedDeps = LoadDependencyGraph(reader); AnnotationSerializer.IntermediateSemanticGraph intermUncollapsedDeps = LoadDependencyGraph(reader); AnnotationSerializer.IntermediateSemanticGraph intermCcDeps = LoadDependencyGraph(reader); // the remaining lines until empty line are tokens IList <CoreLabel> tokens = new List <CoreLabel>(); while ((line = reader.ReadLine()) != null) { if (line.Length == 0) { break; } CoreLabel token = LoadToken(line, haveExplicitAntecedent); tokens.Add(token); } sentence.Set(typeof(CoreAnnotations.TokensAnnotation), tokens); // convert the intermediate graph to an actual SemanticGraph SemanticGraph collapsedDeps = intermCollapsedDeps.ConvertIntermediateGraph(tokens); sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation), collapsedDeps); SemanticGraph uncollapsedDeps = intermUncollapsedDeps.ConvertIntermediateGraph(tokens); sentence.Set(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), uncollapsedDeps); SemanticGraph ccDeps = intermCcDeps.ConvertIntermediateGraph(tokens); sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation), ccDeps); sentences.Add(sentence); } doc.Set(typeof(CoreAnnotations.SentencesAnnotation), sentences); return(Pair.MakePair(doc, @is)); }