예제 #1
0
 public virtual void TestParseArrayEscape()
 {
     NUnit.Framework.Assert.AreEqual(Arrays.AsList("foo", "\"", "a\"b", "bar"), TSVUtils.ParseArray("{foo,\"\\\"\",\"a\\\"b\",bar}"));
     NUnit.Framework.Assert.AreEqual(Arrays.AsList("foo", "\"", "bar"), TSVUtils.ParseArray("{foo,\\\",bar}"));
     NUnit.Framework.Assert.AreEqual(Collections.SingletonList("aa\\bb"), TSVUtils.ParseArray("{\"aa\\\\\\\\bb\"}"));
     // should really give 2 backslashes in answer but doesn't.
     NUnit.Framework.Assert.AreEqual(Collections.SingletonList("a\"b"), TSVUtils.ParseArray("{\"a\"\"b\"}"));
 }
예제 #2
0
        public virtual void TestRealSentenceDoubleEscaped()
        {
            string array = "{\"<ref name=\\\\\"Dr. Mohmmad Riaz Suddle, Director of the Paksat-IR programme and current executive member of the Suparco's plan and research division \\\\\"/>\",On,August,11th,\",\",Paksat-1R,|,'',Paksat-IR,'',was,launched,from,Xichang,Satellite,Launch,Center,by,Suparco,\",\",making,it,first,satellite,to,be,launched,under,this,programme,.}";

            NUnit.Framework.Assert.AreEqual(31, TSVUtils.ParseArray(array).Count);
            NUnit.Framework.Assert.AreEqual(Arrays.AsList("<ref name=\"Dr. Mohmmad Riaz Suddle, Director of the Paksat-IR programme and current executive member of the Suparco's plan and research division \"/>", "On", "August", "11th", ",", "Paksat-1R",
                                                          "|", "''", "Paksat-IR", "''", "was", "launched", "from", "Xichang", "Satellite", "Launch", "Center", "by", "Suparco", ",", "making", "it", "first", "satellite", "to", "be", "launched", "under", "this", "programme", "."), TSVUtils.ParseArray
                                                (array));
        }
예제 #3
0
        public static ICoreMap ToCoreMap(IList <TSVSentenceIterator.SentenceField> fields, IList <string> entries)
        {
            ICoreMap map = new ArrayCoreMap(fields.Count);
            Optional <IList <CoreLabel> > tokens = Optional.Empty();

            // First pass - process all token level stuff.
            foreach (Pair <TSVSentenceIterator.SentenceField, string> entry in Iterables.Zip(fields, entries))
            {
                TSVSentenceIterator.SentenceField field = entry.first;
                string value = TSVUtils.UnescapeSQL(entry.second);
                switch (field)
                {
                case TSVSentenceIterator.SentenceField.Words:
                {
                    IList <string> values = TSVUtils.ParseArray(value);
                    if (!tokens.IsPresent())
                    {
                        tokens = Optional.Of(new List <CoreLabel>(values.Count));
                        for (int i = 0; i < values.Count; i++)
                        {
                            tokens.Get().Add(new CoreLabel());
                        }
                    }
                    int beginChar = 0;
                    for (int i_1 = 0; i_1 < values.Count; i_1++)
                    {
                        tokens.Get()[i_1].SetValue(values[i_1]);
                        tokens.Get()[i_1].SetWord(values[i_1]);
                        tokens.Get()[i_1].SetBeginPosition(beginChar);
                        tokens.Get()[i_1].SetEndPosition(beginChar + values[i_1].Length);
                        beginChar += values[i_1].Length + 1;
                    }
                    break;
                }

                case TSVSentenceIterator.SentenceField.Lemmas:
                {
                    IList <string> values = TSVUtils.ParseArray(value);
                    if (!tokens.IsPresent())
                    {
                        tokens = Optional.Of(new List <CoreLabel>(values.Count));
                        for (int i = 0; i < values.Count; i++)
                        {
                            tokens.Get().Add(new CoreLabel());
                        }
                    }
                    for (int i_1 = 0; i_1 < values.Count; i_1++)
                    {
                        tokens.Get()[i_1].SetLemma(values[i_1]);
                    }
                    break;
                }

                case TSVSentenceIterator.SentenceField.PosTags:
                {
                    IList <string> values = TSVUtils.ParseArray(value);
                    if (!tokens.IsPresent())
                    {
                        tokens = Optional.Of(new List <CoreLabel>(values.Count));
                        for (int i = 0; i < values.Count; i++)
                        {
                            tokens.Get().Add(new CoreLabel());
                        }
                    }
                    for (int i_1 = 0; i_1 < values.Count; i_1++)
                    {
                        tokens.Get()[i_1].SetTag(values[i_1]);
                    }
                    break;
                }

                case TSVSentenceIterator.SentenceField.NerTags:
                {
                    IList <string> values = TSVUtils.ParseArray(value);
                    if (!tokens.IsPresent())
                    {
                        tokens = Optional.Of(new List <CoreLabel>(values.Count));
                        for (int i = 0; i < values.Count; i++)
                        {
                            tokens.Get().Add(new CoreLabel());
                        }
                    }
                    for (int i_1 = 0; i_1 < values.Count; i_1++)
                    {
                        tokens.Get()[i_1].SetNER(values[i_1]);
                    }
                    break;
                }

                default:
                {
                    // ignore.
                    break;
                }
                }
            }
            // Document specific stuff.
            Optional <string> docId         = Optional.Empty();
            Optional <string> sentenceId    = Optional.Empty();
            Optional <int>    sentenceIndex = Optional.Empty();

            foreach (Pair <TSVSentenceIterator.SentenceField, string> entry_1 in Iterables.Zip(fields, entries))
            {
                TSVSentenceIterator.SentenceField field = entry_1.first;
                string value = TSVUtils.UnescapeSQL(entry_1.second);
                switch (field)
                {
                case TSVSentenceIterator.SentenceField.Id:
                {
                    sentenceId = Optional.Of(value);
                    break;
                }

                case TSVSentenceIterator.SentenceField.DocId:
                {
                    docId = Optional.Of(value);
                    break;
                }

                case TSVSentenceIterator.SentenceField.SentenceIndex:
                {
                    sentenceIndex = Optional.Of(System.Convert.ToInt32(value));
                    break;
                }

                case TSVSentenceIterator.SentenceField.Gloss:
                {
                    value = value.Replace("\\n", "\n").Replace("\\t", "\t");
                    map.Set(typeof(CoreAnnotations.TextAnnotation), value);
                    break;
                }

                default:
                {
                    // ignore.
                    break;
                }
                }
            }
            // High level document stuff
            map.Set(typeof(CoreAnnotations.SentenceIDAnnotation), sentenceId.OrElse("-1"));
            map.Set(typeof(CoreAnnotations.DocIDAnnotation), docId.OrElse("???"));
            map.Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentenceIndex.OrElse(0));
            // Doc-char
            if (tokens.IsPresent())
            {
                foreach (Pair <TSVSentenceIterator.SentenceField, string> entry_2 in Iterables.Zip(fields, entries))
                {
                    TSVSentenceIterator.SentenceField field = entry_2.first;
                    string value = TSVUtils.UnescapeSQL(entry_2.second);
                    switch (field)
                    {
                    case TSVSentenceIterator.SentenceField.DocCharBegin:
                    {
                        IList <string> values = TSVUtils.ParseArray(value);
                        for (int i = 0; i < tokens.Get().Count; i++)
                        {
                            tokens.Get()[i].SetBeginPosition(System.Convert.ToInt32(values[i]));
                        }
                        break;
                    }

                    case TSVSentenceIterator.SentenceField.DocCharEnd:
                    {
                        IList <string> values = TSVUtils.ParseArray(value);
                        for (int i = 0; i < tokens.Get().Count; i++)
                        {
                            tokens.Get()[i].SetEndPosition(System.Convert.ToInt32(values[i]));
                        }
                        break;
                    }

                    default:
                    {
                        // ignore.
                        break;
                    }
                    }
                }
            }
            // Final token level stuff.
            if (tokens.IsPresent())
            {
                for (int i = 0; i < tokens.Get().Count; i++)
                {
                    tokens.Get()[i].Set(typeof(CoreAnnotations.DocIDAnnotation), docId.OrElse("???"));
                    tokens.Get()[i].Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentenceIndex.OrElse(-1));
                    tokens.Get()[i].Set(typeof(CoreAnnotations.IndexAnnotation), i + 1);
                    tokens.Get()[i].Set(typeof(CoreAnnotations.TokenBeginAnnotation), i);
                    tokens.Get()[i].Set(typeof(CoreAnnotations.TokenEndAnnotation), i + 1);
                }
            }
            // Dependency trees
            if (tokens.IsPresent())
            {
                map.Set(typeof(CoreAnnotations.TokensAnnotation), tokens.Get());
                map.Set(typeof(CoreAnnotations.TokenBeginAnnotation), 0);
                map.Set(typeof(CoreAnnotations.TokenEndAnnotation), tokens.Get().Count);
                foreach (Pair <TSVSentenceIterator.SentenceField, string> entry_2 in Iterables.Zip(fields, entries))
                {
                    TSVSentenceIterator.SentenceField field = entry_2.first;
                    string value = TSVUtils.UnescapeSQL(entry_2.second);
                    switch (field)
                    {
                    case TSVSentenceIterator.SentenceField.DependenciesBasic:
                    {
                        SemanticGraph graph = TSVUtils.ParseJsonTree(value, tokens.Get());
                        map.Set(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), graph);
                        //            if (!map.containsKey(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class))
                        //              map.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, graph);
                        //            if (!map.containsKey(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class))
                        //              map.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, graph);
                        break;
                    }

                    case TSVSentenceIterator.SentenceField.DependenciesCollapsed:
                    {
                        SemanticGraph graph = TSVUtils.ParseJsonTree(value, tokens.Get());
                        map.Set(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation), graph);
                        break;
                    }

                    case TSVSentenceIterator.SentenceField.DependenciesCollapsedCc:
                    {
                        SemanticGraph graph = TSVUtils.ParseJsonTree(value, tokens.Get());
                        //            if (!map.containsKey(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class))
                        //              map.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph);
                        //            map.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, graph);
                        map.Set(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation), graph);
                        break;
                    }

                    case TSVSentenceIterator.SentenceField.DependenciesAlternate:
                    {
                        SemanticGraph graph = TSVUtils.ParseJsonTree(value, tokens.Get());
                        map.Set(typeof(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation), graph);
                        break;
                    }

                    default:
                    {
                        // ignore.
                        break;
                    }
                    }
                }
            }
            return(map);
        }
예제 #4
0
 public virtual void TestParseArrayQuote()
 {
     NUnit.Framework.Assert.AreEqual(Arrays.AsList("foo", ",", "a,b", "bar"), TSVUtils.ParseArray("{foo,\",\",\"a,b\",bar}"));
 }
예제 #5
0
 public virtual void TestParseArrayTrivial()
 {
     NUnit.Framework.Assert.AreEqual(Arrays.AsList("foo", "bar"), TSVUtils.ParseArray("{foo,bar}"));
 }