public Doc(Vocab vocab) { _vocab = vocab; using (Py.GIL()) { dynamic spacy = Py.Import("spacy"); dynamic pyVocab = vocab.PyVocab; PyDoc = spacy.tokens.doc.Doc.__call__(pyVocab); } }
private void Copy(Doc doc) { // I'd rather copy Python object no matter the serialization mode // If set to DotNet, the variable will be initialized to null // disregarding its current value which might be a default object PyDoc = doc.PyDoc; _text = doc._text; _vocab = doc._vocab; _tokens = doc._tokens; _sentences = doc._sentences; _nounChunks = doc._nounChunks; _ents = doc._ents; }
internal Doc(dynamic doc, string text) { PyDoc = doc; _vocab = null; _text = text; }
internal Doc(dynamic doc) { PyDoc = doc; _vocab = null; }
public void ReadXml(XmlReader reader) { var serializationMode = Serialization.Selected; reader.MoveToContent(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Doc"); reader.ReadStartElement(); if (serializationMode == Serialization.Mode.SpacyAndDotNet) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:PyObj"); var bytesB64 = reader.ReadElementContentAsString(); var bytes = Convert.FromBase64String(bytesB64); using (Py.GIL()) { dynamic spacy = Py.Import("spacy"); dynamic pyVocab = spacy.vocab.Vocab.__call__(); PyDoc = spacy.tokens.doc.Doc.__call__(pyVocab); var pyBytes = ToPython.GetBytes(bytes); PyDoc.from_bytes(pyBytes); _vocab = new Vocab(PyDoc.vocab); } } Debug.Assert(Serialization.Selected != Serialization.Mode.Spacy); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Text"); _text = reader.ReadElementContentAsString(); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Vocab"); _vocab = new Vocab(null); _vocab.ReadXml(reader); Debug.Assert(reader.Name == $"{Serialization.Prefix}:Tokens"); _tokens = new List <Token>(); var isEmpty = reader.IsEmptyElement; reader.ReadStartElement(); if (!isEmpty) { while (reader.MoveToContent() != XmlNodeType.EndElement) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:Token"); reader.ReadStartElement(); if (reader.NodeType != XmlNodeType.EndElement) { var token = new Token(); token.ReadXml(reader); _tokens.Add(token); reader.ReadEndElement(); } } reader.ReadEndElement(); } foreach (var token in _tokens) { token.RestoreHead(_tokens); } Debug.Assert(reader.Name == $"{Serialization.Prefix}:Sentences"); _sentences = new List <Span>(); isEmpty = reader.IsEmptyElement; reader.ReadStartElement(); if (!isEmpty) { while (reader.MoveToContent() != XmlNodeType.EndElement) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:Sent"); reader.ReadStartElement(); if (reader.NodeType != XmlNodeType.EndElement) { var sent = new Span(); sent.ReadXml(reader); _sentences.Add(sent); reader.ReadEndElement(); } } reader.ReadEndElement(); } Debug.Assert(reader.Name == $"{Serialization.Prefix}:NounChunks"); _nounChunks = new List <Span>(); isEmpty = reader.IsEmptyElement; reader.ReadStartElement(); if (!isEmpty) { while (reader.MoveToContent() != XmlNodeType.EndElement) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:NounChunk"); reader.ReadStartElement(); if (reader.NodeType != XmlNodeType.EndElement) { var nChunk = new Span(); nChunk.ReadXml(reader); _nounChunks.Add(nChunk); reader.ReadEndElement(); } } reader.ReadEndElement(); } Debug.Assert(reader.Name == $"{Serialization.Prefix}:Ents"); _ents = new List <Span>(); reader.ReadStartElement(); while (reader.MoveToContent() != XmlNodeType.EndElement) { Debug.Assert(reader.Name == $"{Serialization.Prefix}:Ent"); reader.ReadStartElement(); if (reader.NodeType != XmlNodeType.EndElement) { var ent = new Span(); ent.ReadXml(reader); _ents.Add(ent); reader.ReadEndElement(); } } reader.ReadEndElement(); }
public List <Doc> GetDocs(Vocab vocab) { return(Interop.GetListFromGenerator(_pyDocBin?.get_docs(vocab.PyVocab), ref _docs)); }