Example #1
0
            public virtual IndexedWord Apply(string line)
            {
                IndexedWord word = new IndexedWord();

                if (line.StartsWith("#"))
                {
                    word.SetWord(line);
                    word.SetTag(CommentPos);
                    return(word);
                }
                string[] bits = line.Split("\\s+");
                word.Set(typeof(CoreAnnotations.TextAnnotation), bits[1]);
                /* Check if it is a multiword token. */
                if (bits[0].Contains("-"))
                {
                    string[] span  = bits[0].Split("-");
                    int      start = System.Convert.ToInt32(span[0]);
                    int      end   = System.Convert.ToInt32(span[1]);
                    word.Set(typeof(CoreAnnotations.CoNLLUTokenSpanAnnotation), new IntPair(start, end));
                    word.Set(typeof(CoreAnnotations.IndexAnnotation), start);
                }
                else
                {
                    if (bits[0].Contains("."))
                    {
                        string[] indexParts = bits[0].Split("\\.");
                        int      index      = System.Convert.ToInt32(indexParts[0]);
                        int      copyCount  = System.Convert.ToInt32(indexParts[1]);
                        word.Set(typeof(CoreAnnotations.IndexAnnotation), index);
                        word.SetIndex(index);
                        word.SetCopyCount(copyCount);
                        word.SetValue(bits[1]);
                        /* Parse features. */
                        Dictionary <string, string> features = CoNLLUUtils.ParseFeatures(bits[5]);
                        word.Set(typeof(CoreAnnotations.CoNLLUFeats), features);
                        /* Parse extra dependencies. */
                        Dictionary <string, string> extraDeps = CoNLLUUtils.ParseExtraDeps(bits[8]);
                        word.Set(typeof(CoreAnnotations.CoNLLUSecondaryDepsAnnotation), extraDeps);
                    }
                    else
                    {
                        word.Set(typeof(CoreAnnotations.IndexAnnotation), System.Convert.ToInt32(bits[0]));
                        word.Set(typeof(CoreAnnotations.LemmaAnnotation), bits[2]);
                        word.Set(typeof(CoreAnnotations.CoarseTagAnnotation), bits[3]);
                        word.Set(typeof(CoreAnnotations.PartOfSpeechAnnotation), bits[4]);
                        word.Set(typeof(CoreAnnotations.CoNLLDepParentIndexAnnotation), System.Convert.ToInt32(bits[6]));
                        word.Set(typeof(CoreAnnotations.CoNLLDepTypeAnnotation), bits[7]);
                        word.Set(typeof(CoreAnnotations.CoNLLUMisc), bits[9]);
                        word.SetIndex(System.Convert.ToInt32(bits[0]));
                        word.SetValue(bits[1]);
                        /* Parse features. */
                        Dictionary <string, string> features = CoNLLUUtils.ParseFeatures(bits[5]);
                        word.Set(typeof(CoreAnnotations.CoNLLUFeats), features);
                        /* Parse extra dependencies. */
                        Dictionary <string, string> extraDeps = CoNLLUUtils.ParseExtraDeps(bits[8]);
                        word.Set(typeof(CoreAnnotations.CoNLLUSecondaryDepsAnnotation), extraDeps);
                    }
                }
                return(word);
            }
Example #2
0
 /// <exception cref="System.IO.IOException"/>
 private void LoadFeatureMap()
 {
     using (Reader r = IOUtils.ReaderFromString(FeatureMapFile))
     {
         BufferedReader br = new BufferedReader(r);
         posFeatureMap     = new Dictionary <string, Dictionary <string, string> >();
         wordPosFeatureMap = new Dictionary <string, Dictionary <string, string> >();
         string line;
         while ((line = br.ReadLine()) != null)
         {
             string[] parts = line.Split("\\s+");
             if (parts.Length < 3)
             {
                 continue;
             }
             if (parts[0].Equals("*"))
             {
                 posFeatureMap[parts[1]] = CoNLLUUtils.ParseFeatures(parts[2]);
             }
             else
             {
                 wordPosFeatureMap[parts[0] + '_' + parts[1]] = CoNLLUUtils.ParseFeatures(parts[2]);
             }
         }
     }
 }