public virtual void TestParseArrayEscape() { NUnit.Framework.Assert.AreEqual(Arrays.AsList("foo", "\"", "a\"b", "bar"), TSVUtils.ParseArray("{foo,\"\\\"\",\"a\\\"b\",bar}")); NUnit.Framework.Assert.AreEqual(Arrays.AsList("foo", "\"", "bar"), TSVUtils.ParseArray("{foo,\\\",bar}")); NUnit.Framework.Assert.AreEqual(Collections.SingletonList("aa\\bb"), TSVUtils.ParseArray("{\"aa\\\\\\\\bb\"}")); // should really give 2 backslashes in answer but doesn't. NUnit.Framework.Assert.AreEqual(Collections.SingletonList("a\"b"), TSVUtils.ParseArray("{\"a\"\"b\"}")); }
public virtual void TestRealSentenceDoubleEscaped() { string array = "{\"<ref name=\\\\\"Dr. Mohmmad Riaz Suddle, Director of the Paksat-IR programme and current executive member of the Suparco's plan and research division \\\\\"/>\",On,August,11th,\",\",Paksat-1R,|,'',Paksat-IR,'',was,launched,from,Xichang,Satellite,Launch,Center,by,Suparco,\",\",making,it,first,satellite,to,be,launched,under,this,programme,.}"; NUnit.Framework.Assert.AreEqual(31, TSVUtils.ParseArray(array).Count); NUnit.Framework.Assert.AreEqual(Arrays.AsList("<ref name=\"Dr. Mohmmad Riaz Suddle, Director of the Paksat-IR programme and current executive member of the Suparco's plan and research division \"/>", "On", "August", "11th", ",", "Paksat-1R", "|", "''", "Paksat-IR", "''", "was", "launched", "from", "Xichang", "Satellite", "Launch", "Center", "by", "Suparco", ",", "making", "it", "first", "satellite", "to", "be", "launched", "under", "this", "programme", "."), TSVUtils.ParseArray (array)); }
public static ICoreMap ToCoreMap(IList <TSVSentenceIterator.SentenceField> fields, IList <string> entries) { ICoreMap map = new ArrayCoreMap(fields.Count); Optional <IList <CoreLabel> > tokens = Optional.Empty(); // First pass - process all token level stuff. foreach (Pair <TSVSentenceIterator.SentenceField, string> entry in Iterables.Zip(fields, entries)) { TSVSentenceIterator.SentenceField field = entry.first; string value = TSVUtils.UnescapeSQL(entry.second); switch (field) { case TSVSentenceIterator.SentenceField.Words: { IList <string> values = TSVUtils.ParseArray(value); if (!tokens.IsPresent()) { tokens = Optional.Of(new List <CoreLabel>(values.Count)); for (int i = 0; i < values.Count; i++) { tokens.Get().Add(new CoreLabel()); } } int beginChar = 0; for (int i_1 = 0; i_1 < values.Count; i_1++) { tokens.Get()[i_1].SetValue(values[i_1]); tokens.Get()[i_1].SetWord(values[i_1]); tokens.Get()[i_1].SetBeginPosition(beginChar); tokens.Get()[i_1].SetEndPosition(beginChar + values[i_1].Length); beginChar += values[i_1].Length + 1; } break; } case TSVSentenceIterator.SentenceField.Lemmas: { IList <string> values = TSVUtils.ParseArray(value); if (!tokens.IsPresent()) { tokens = Optional.Of(new List <CoreLabel>(values.Count)); for (int i = 0; i < values.Count; i++) { tokens.Get().Add(new CoreLabel()); } } for (int i_1 = 0; i_1 < values.Count; i_1++) { tokens.Get()[i_1].SetLemma(values[i_1]); } break; } case TSVSentenceIterator.SentenceField.PosTags: { IList <string> values = TSVUtils.ParseArray(value); if (!tokens.IsPresent()) { tokens = Optional.Of(new List <CoreLabel>(values.Count)); for (int i = 0; i < values.Count; i++) { tokens.Get().Add(new CoreLabel()); } } for (int i_1 = 0; i_1 < values.Count; i_1++) { tokens.Get()[i_1].SetTag(values[i_1]); } break; } case TSVSentenceIterator.SentenceField.NerTags: { IList <string> values = TSVUtils.ParseArray(value); if (!tokens.IsPresent()) { tokens = Optional.Of(new List <CoreLabel>(values.Count)); for (int i = 0; i < values.Count; i++) { tokens.Get().Add(new CoreLabel()); } } for (int i_1 = 0; i_1 < values.Count; i_1++) { tokens.Get()[i_1].SetNER(values[i_1]); } break; } default: { // ignore. break; } } } // Document specific stuff. Optional <string> docId = Optional.Empty(); Optional <string> sentenceId = Optional.Empty(); Optional <int> sentenceIndex = Optional.Empty(); foreach (Pair <TSVSentenceIterator.SentenceField, string> entry_1 in Iterables.Zip(fields, entries)) { TSVSentenceIterator.SentenceField field = entry_1.first; string value = TSVUtils.UnescapeSQL(entry_1.second); switch (field) { case TSVSentenceIterator.SentenceField.Id: { sentenceId = Optional.Of(value); break; } case TSVSentenceIterator.SentenceField.DocId: { docId = Optional.Of(value); break; } case TSVSentenceIterator.SentenceField.SentenceIndex: { sentenceIndex = Optional.Of(System.Convert.ToInt32(value)); break; } case TSVSentenceIterator.SentenceField.Gloss: { value = value.Replace("\\n", "\n").Replace("\\t", "\t"); map.Set(typeof(CoreAnnotations.TextAnnotation), value); break; } default: { // ignore. break; } } } // High level document stuff map.Set(typeof(CoreAnnotations.SentenceIDAnnotation), sentenceId.OrElse("-1")); map.Set(typeof(CoreAnnotations.DocIDAnnotation), docId.OrElse("???")); map.Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentenceIndex.OrElse(0)); // Doc-char if (tokens.IsPresent()) { foreach (Pair <TSVSentenceIterator.SentenceField, string> entry_2 in Iterables.Zip(fields, entries)) { TSVSentenceIterator.SentenceField field = entry_2.first; string value = TSVUtils.UnescapeSQL(entry_2.second); switch (field) { case TSVSentenceIterator.SentenceField.DocCharBegin: { IList <string> values = TSVUtils.ParseArray(value); for (int i = 0; i < tokens.Get().Count; i++) { tokens.Get()[i].SetBeginPosition(System.Convert.ToInt32(values[i])); } break; } case TSVSentenceIterator.SentenceField.DocCharEnd: { IList <string> values = TSVUtils.ParseArray(value); for (int i = 0; i < tokens.Get().Count; i++) { tokens.Get()[i].SetEndPosition(System.Convert.ToInt32(values[i])); } break; } default: { // ignore. break; } } } } // Final token level stuff. if (tokens.IsPresent()) { for (int i = 0; i < tokens.Get().Count; i++) { tokens.Get()[i].Set(typeof(CoreAnnotations.DocIDAnnotation), docId.OrElse("???")); tokens.Get()[i].Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentenceIndex.OrElse(-1)); tokens.Get()[i].Set(typeof(CoreAnnotations.IndexAnnotation), i + 1); tokens.Get()[i].Set(typeof(CoreAnnotations.TokenBeginAnnotation), i); tokens.Get()[i].Set(typeof(CoreAnnotations.TokenEndAnnotation), i + 1); } } // Dependency trees if (tokens.IsPresent()) { map.Set(typeof(CoreAnnotations.TokensAnnotation), tokens.Get()); map.Set(typeof(CoreAnnotations.TokenBeginAnnotation), 0); map.Set(typeof(CoreAnnotations.TokenEndAnnotation), tokens.Get().Count); foreach (Pair <TSVSentenceIterator.SentenceField, string> entry_2 in Iterables.Zip(fields, entries)) { TSVSentenceIterator.SentenceField field = entry_2.first; string value = TSVUtils.UnescapeSQL(entry_2.second); switch (field) { case TSVSentenceIterator.SentenceField.DependenciesBasic: { SemanticGraph graph = TSVUtils.ParseJsonTree(value, tokens.Get()); map.Set(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), graph); // if (!map.containsKey(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class)) // map.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, graph); // if (!map.containsKey(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class)) // map.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, graph); break; } case TSVSentenceIterator.SentenceField.DependenciesCollapsed: { SemanticGraph graph = TSVUtils.ParseJsonTree(value, tokens.Get()); map.Set(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation), graph); break; } case TSVSentenceIterator.SentenceField.DependenciesCollapsedCc: { SemanticGraph graph = TSVUtils.ParseJsonTree(value, tokens.Get()); // if (!map.containsKey(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class)) // map.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph); // map.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, graph); map.Set(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation), graph); break; } case TSVSentenceIterator.SentenceField.DependenciesAlternate: { SemanticGraph graph = TSVUtils.ParseJsonTree(value, tokens.Get()); map.Set(typeof(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation), graph); break; } default: { // ignore. break; } } } } return(map); }
public virtual void TestParseArrayQuote() { NUnit.Framework.Assert.AreEqual(Arrays.AsList("foo", ",", "a,b", "bar"), TSVUtils.ParseArray("{foo,\",\",\"a,b\",bar}")); }
public virtual void TestParseArrayTrivial() { NUnit.Framework.Assert.AreEqual(Arrays.AsList("foo", "bar"), TSVUtils.ParseArray("{foo,bar}")); }