public void ReadXml(XmlReader reader) { var serializationMode = Serialization.Selected; if (serializationMode == Serialization.Mode.SpacyAndDotNet) { reader.ReadStartElement(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:PyObj"); var bytesB64 = reader.ReadElementContentAsString(); var bytes = Convert.FromBase64String(bytesB64); using (Py.GIL()) { dynamic spacy = Py.Import("spacy"); PyVocab = spacy.vocab.Vocab.__call__(); var pyBytes = ToPython.GetBytes(bytes); PyVocab.from_bytes(pyBytes); } reader.ReadEndElement(); } else { reader.Skip(); } Debug.Assert(serializationMode != Serialization.Mode.Spacy); }
public void ReadXml(XmlReader reader) { var serializationMode = Serialization.Selected; reader.MoveToContent(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:DocBin"); reader.ReadStartElement(); if (serializationMode == Serialization.Mode.SpacyAndDotNet) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:PyObj"); var bytesB64 = reader.ReadElementContentAsString(); var bytes = Convert.FromBase64String(bytesB64); using (Py.GIL()) { dynamic spacy = Py.Import("spacy"); _pyDocBin = spacy.tokens.DocBin.__call__(); var pyBytes = ToPython.GetBytes(bytes); _pyDocBin.from_bytes(pyBytes); } } Debug.Assert(serializationMode != Serialization.Mode.Spacy); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Docs"); reader.ReadStartElement(); _docs = new List <Doc>(); while (reader.MoveToContent() != XmlNodeType.EndElement) { if (reader.NodeType != XmlNodeType.EndElement) { var doc = new Doc(); doc.ReadXml(reader); _docs.Add(doc); } } reader.ReadEndElement(); }
public void ReadXml(XmlReader reader) { // TODO: Yet to debug. It's not being used so far Debug.Assert(reader.Name == $"{Serialization.Prefix}:PyObj"); var bytesB64 = reader.ReadElementContentAsString(); var bytes = Convert.FromBase64String(bytesB64); using (Py.GIL()) { var pyBytes = ToPython.GetBytes(bytes); _pyLexeme.from_bytes(pyBytes); } Debug.Assert(reader.Name == $"{Serialization.Prefix}:Text"); _text = reader.ReadElementContentAsString(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Shape"); _shape = reader.ReadElementContentAsString(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Prefix"); _prefix = reader.ReadElementContentAsString(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Suffix"); _suffix = reader.ReadElementContentAsString(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Lang"); _lang = reader.ReadElementContentAsString(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Orth"); var orth = reader.ReadElementContentAsString(); _orth = BigInteger.Parse(orth); Debug.Assert(reader.Name == $"{Serialization.Prefix}:IsAlpha"); _isAlpha = reader.ReadElementContentAsBoolean(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:IsDigit"); _isDigit = reader.ReadElementContentAsBoolean(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:IsTitle"); _isTitle = reader.ReadElementContentAsBoolean(); }
public void FromBytes(byte[] bytes) { if (Serialization.Selected == Serialization.Mode.Spacy) { using (Py.GIL()) { var pyBytes = ToPython.GetBytes(bytes); PyDoc.from_bytes(pyBytes); } } else { var stream = new MemoryStream(bytes); var settings = new XmlReaderSettings(); settings.IgnoreComments = true; settings.IgnoreWhitespace = true; var reader = XmlReader.Create(stream, settings); var doc = new Doc(); doc.ReadXml(reader); Copy(doc); } }
public void ReadXml(XmlReader reader) { var serializationMode = Serialization.Selected; reader.MoveToContent(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Doc"); reader.ReadStartElement(); if (serializationMode == Serialization.Mode.SpacyAndDotNet) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:PyObj"); var bytesB64 = reader.ReadElementContentAsString(); var bytes = Convert.FromBase64String(bytesB64); using (Py.GIL()) { dynamic spacy = Py.Import("spacy"); dynamic pyVocab = spacy.vocab.Vocab.__call__(); PyDoc = spacy.tokens.doc.Doc.__call__(pyVocab); var pyBytes = ToPython.GetBytes(bytes); PyDoc.from_bytes(pyBytes); _vocab = new Vocab(PyDoc.vocab); } } Debug.Assert(Serialization.Selected != Serialization.Mode.Spacy); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Text"); _text = reader.ReadElementContentAsString(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Vocab"); _vocab = new Vocab(null); _vocab.ReadXml(reader); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Tokens"); _tokens = new List <Token>(); var isEmpty = reader.IsEmptyElement; reader.ReadStartElement(); if (!isEmpty) { while (reader.MoveToContent() != XmlNodeType.EndElement) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:Token"); reader.ReadStartElement(); if (reader.NodeType != XmlNodeType.EndElement) { var token = new Token(); token.ReadXml(reader); _tokens.Add(token); reader.ReadEndElement(); } } reader.ReadEndElement(); } foreach (var token in _tokens) { token.RestoreHead(_tokens); } Debug.Assert(reader.Name == $"{Serialization.Prefix}:Sentences"); _sentences = new List <Span>(); isEmpty = reader.IsEmptyElement; reader.ReadStartElement(); if (!isEmpty) { while (reader.MoveToContent() != XmlNodeType.EndElement) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:Sent"); reader.ReadStartElement(); if (reader.NodeType != XmlNodeType.EndElement) { var sent = new Span(); sent.ReadXml(reader); _sentences.Add(sent); reader.ReadEndElement(); } } reader.ReadEndElement(); } Debug.Assert(reader.Name == $"{Serialization.Prefix}:NounChunks"); _nounChunks = new List <Span>(); isEmpty = reader.IsEmptyElement; reader.ReadStartElement(); if (!isEmpty) { while (reader.MoveToContent() != XmlNodeType.EndElement) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:NounChunk"); reader.ReadStartElement(); if (reader.NodeType != XmlNodeType.EndElement) { var nChunk = new Span(); nChunk.ReadXml(reader); _nounChunks.Add(nChunk); reader.ReadEndElement(); } } reader.ReadEndElement(); } Debug.Assert(reader.Name == $"{Serialization.Prefix}:Ents"); _ents = new List <Span>(); reader.ReadStartElement(); while (reader.MoveToContent() != XmlNodeType.EndElement) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:Ent"); reader.ReadStartElement(); if (reader.NodeType != XmlNodeType.EndElement) { var ent = new Span(); ent.ReadXml(reader); _ents.Add(ent); reader.ReadEndElement(); } } reader.ReadEndElement(); }