public virtual IndexedWord Apply(string line) { IndexedWord word = new IndexedWord(); if (line.StartsWith("#")) { word.SetWord(line); word.SetTag(CommentPos); return(word); } string[] bits = line.Split("\\s+"); word.Set(typeof(CoreAnnotations.TextAnnotation), bits[1]); /* Check if it is a multiword token. */ if (bits[0].Contains("-")) { string[] span = bits[0].Split("-"); int start = System.Convert.ToInt32(span[0]); int end = System.Convert.ToInt32(span[1]); word.Set(typeof(CoreAnnotations.CoNLLUTokenSpanAnnotation), new IntPair(start, end)); word.Set(typeof(CoreAnnotations.IndexAnnotation), start); } else { if (bits[0].Contains(".")) { string[] indexParts = bits[0].Split("\\."); int index = System.Convert.ToInt32(indexParts[0]); int copyCount = System.Convert.ToInt32(indexParts[1]); word.Set(typeof(CoreAnnotations.IndexAnnotation), index); word.SetIndex(index); word.SetCopyCount(copyCount); word.SetValue(bits[1]); /* Parse features. */ Dictionary <string, string> features = CoNLLUUtils.ParseFeatures(bits[5]); word.Set(typeof(CoreAnnotations.CoNLLUFeats), features); /* Parse extra dependencies. */ Dictionary <string, string> extraDeps = CoNLLUUtils.ParseExtraDeps(bits[8]); word.Set(typeof(CoreAnnotations.CoNLLUSecondaryDepsAnnotation), extraDeps); } else { word.Set(typeof(CoreAnnotations.IndexAnnotation), System.Convert.ToInt32(bits[0])); word.Set(typeof(CoreAnnotations.LemmaAnnotation), bits[2]); word.Set(typeof(CoreAnnotations.CoarseTagAnnotation), bits[3]); word.Set(typeof(CoreAnnotations.PartOfSpeechAnnotation), bits[4]); word.Set(typeof(CoreAnnotations.CoNLLDepParentIndexAnnotation), System.Convert.ToInt32(bits[6])); word.Set(typeof(CoreAnnotations.CoNLLDepTypeAnnotation), bits[7]); word.Set(typeof(CoreAnnotations.CoNLLUMisc), bits[9]); word.SetIndex(System.Convert.ToInt32(bits[0])); word.SetValue(bits[1]); /* Parse features. */ Dictionary <string, string> features = CoNLLUUtils.ParseFeatures(bits[5]); word.Set(typeof(CoreAnnotations.CoNLLUFeats), features); /* Parse extra dependencies. */ Dictionary <string, string> extraDeps = CoNLLUUtils.ParseExtraDeps(bits[8]); word.Set(typeof(CoreAnnotations.CoNLLUSecondaryDepsAnnotation), extraDeps); } } return(word); }
/// <exception cref="System.IO.IOException"/> private void LoadFeatureMap() { using (Reader r = IOUtils.ReaderFromString(FeatureMapFile)) { BufferedReader br = new BufferedReader(r); posFeatureMap = new Dictionary <string, Dictionary <string, string> >(); wordPosFeatureMap = new Dictionary <string, Dictionary <string, string> >(); string line; while ((line = br.ReadLine()) != null) { string[] parts = line.Split("\\s+"); if (parts.Length < 3) { continue; } if (parts[0].Equals("*")) { posFeatureMap[parts[1]] = CoNLLUUtils.ParseFeatures(parts[2]); } else { wordPosFeatureMap[parts[0] + '_' + parts[1]] = CoNLLUUtils.ParseFeatures(parts[2]); } } } }