public void FromBytes(byte[] bytes) { if (Serialization.Selected == Serialization.Mode.Spacy) { using (Py.GIL()) { var pyBytes = ToPython.GetBytes(bytes); PyDoc.from_bytes(pyBytes); } } else { var stream = new MemoryStream(bytes); var settings = new XmlReaderSettings(); settings.IgnoreComments = true; settings.IgnoreWhitespace = true; var reader = XmlReader.Create(stream, settings); var doc = new Doc(); doc.ReadXml(reader); Copy(doc); } }
public void ReadXml(XmlReader reader) { var serializationMode = Serialization.Selected; reader.MoveToContent(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Doc"); reader.ReadStartElement(); if (serializationMode == Serialization.Mode.SpacyAndDotNet) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:PyObj"); var bytesB64 = reader.ReadElementContentAsString(); var bytes = Convert.FromBase64String(bytesB64); using (Py.GIL()) { dynamic spacy = Py.Import("spacy"); dynamic pyVocab = spacy.vocab.Vocab.__call__(); PyDoc = spacy.tokens.doc.Doc.__call__(pyVocab); var pyBytes = ToPython.GetBytes(bytes); PyDoc.from_bytes(pyBytes); _vocab = new Vocab(PyDoc.vocab); } } Debug.Assert(Serialization.Selected != Serialization.Mode.Spacy); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Text"); _text = reader.ReadElementContentAsString(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Vocab"); _vocab = new Vocab(null); _vocab.ReadXml(reader); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Tokens"); _tokens = new List <Token>(); var isEmpty = reader.IsEmptyElement; reader.ReadStartElement(); if (!isEmpty) { while (reader.MoveToContent() != XmlNodeType.EndElement) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:Token"); reader.ReadStartElement(); if (reader.NodeType != XmlNodeType.EndElement) { var token = new Token(); token.ReadXml(reader); _tokens.Add(token); reader.ReadEndElement(); } } reader.ReadEndElement(); } foreach (var token in _tokens) { token.RestoreHead(_tokens); } Debug.Assert(reader.Name == $"{Serialization.Prefix}:Sentences"); _sentences = new List <Span>(); isEmpty = reader.IsEmptyElement; reader.ReadStartElement(); if (!isEmpty) { while (reader.MoveToContent() != XmlNodeType.EndElement) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:Sent"); reader.ReadStartElement(); if (reader.NodeType != XmlNodeType.EndElement) { var sent = new Span(); sent.ReadXml(reader); _sentences.Add(sent); reader.ReadEndElement(); } } reader.ReadEndElement(); } Debug.Assert(reader.Name == $"{Serialization.Prefix}:NounChunks"); _nounChunks = new List <Span>(); isEmpty = reader.IsEmptyElement; reader.ReadStartElement(); if (!isEmpty) { while (reader.MoveToContent() != XmlNodeType.EndElement) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:NounChunk"); reader.ReadStartElement(); if (reader.NodeType != XmlNodeType.EndElement) { var nChunk = new Span(); nChunk.ReadXml(reader); _nounChunks.Add(nChunk); reader.ReadEndElement(); } } reader.ReadEndElement(); } Debug.Assert(reader.Name == $"{Serialization.Prefix}:Ents"); _ents = new List <Span>(); reader.ReadStartElement(); while (reader.MoveToContent() != XmlNodeType.EndElement) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:Ent"); reader.ReadStartElement(); if (reader.NodeType != XmlNodeType.EndElement) { var ent = new Span(); ent.ReadXml(reader); _ents.Add(ent); reader.ReadEndElement(); } } reader.ReadEndElement(); }