public static void Run() { var spacy = new Spacy(); var nlp = spacy.Load("en_core_web_sm"); var doc = nlp.GetDocument("Apple is looking at buying U.K. startup for $1 billion"); foreach (Token token in doc.Tokens) { Console.WriteLine($"{token.Text} {token.Lemma} {token.PoS} {token.Tag} {token.Dep} {token.Shape} {token.IsAlpha} {token.IsStop}"); } Console.WriteLine(""); foreach (Span ent in doc.Ents) { Console.WriteLine($"{ent.Text} {ent.StartChar} {ent.EndChar} {ent.Label}"); } nlp = spacy.Load("en_core_web_md"); var tokens = nlp.GetDocument("dog cat banana afskfsd"); Console.WriteLine(""); foreach (Token token in tokens.Tokens) { Console.WriteLine($"{token.Text} {token.HasVector} {token.VectorNorm}, {token.IsOov}"); } tokens = nlp.GetDocument("dog cat banana"); Console.WriteLine(""); foreach (Token token1 in tokens.Tokens) { foreach (Token token2 in tokens.Tokens) { Console.WriteLine($"{token1.Text} {token2.Text} {token1.Similarity(token2) }"); } } doc = nlp.GetDocument("I love coffee"); Console.WriteLine(""); Console.WriteLine(doc.Vocab.Strings["coffee"]); Console.WriteLine(doc.Vocab.Strings[3197928453018144401]); Console.WriteLine(""); foreach (Token word in doc.Tokens) { var lexeme = doc.Vocab[word.Text]; Console.WriteLine($@"{lexeme.Text} {lexeme.Orth} {lexeme.Shape} {lexeme.Prefix} {lexeme.Suffix} {lexeme.IsAlpha} {lexeme.IsDigit} {lexeme.IsTitle} {lexeme.Lang}"); } }
public static void Run() { var spacy = new Spacy(); var nlp = spacy.Load("en_core_web_sm"); var doc = nlp.GetDocument("Apple is looking at buying U.K. startup for $1 billion"); var displacy = new Displacy(); displacy.Serve(doc, "dep"); }
public static void Run() { var spacy = new Spacy(); var nlp = spacy.Load("en_core_web_sm"); var text = "Autonomous cars shift insurance liability toward manufacturers"; var doc = nlp.GetDocument(text); foreach (var token in doc.Tokens) { var childs = new List <string>(); token.Children.ForEach(c => childs.Add(c.Text)); Console.WriteLine($"{token.Text} {token.Dep} {token.Head.Text} [{string.Join(", ", childs)}]"); } }
public static void Run() { var spacy = new Spacy(); var text = "I love coffee"; // Load base document var nlp = spacy.Load("en_core_web_sm"); var docBase = nlp.GetDocument(text); Console.WriteLine(""); PrintDoc(docBase); // Serialize document to disk and bytes docBase.ToDisk("doc.spacy"); var docBaseBytes = docBase.ToBytes(); // Serialize using DocBin var docBinBase = new DocBin(attrs: new string[] { "ENT_IOB", "POS", "HEAD", "DEP", "ENT_TYPE" }, storeUserData: true); docBinBase.Add(docBase); var docBinBaseBytes = docBinBase.ToBytes(); // Restore document from disk var doc = new Doc(new Vocab()); doc.FromDisk("doc.spacy"); Console.WriteLine(""); PrintDoc(doc); // Restore document from bytes doc = new Doc(new Vocab()); doc.FromBytes(docBaseBytes); Console.WriteLine(""); PrintDoc(doc); // Restore using DocBin var docBin = new DocBin(); docBin.FromBytes(docBinBaseBytes); var docs = docBin.GetDocs(nlp.Vocab); Console.WriteLine(""); PrintDoc(docs[0]); }
public static void Run() { var spacy = new Spacy(); var nlp = spacy.Load("es_core_news_sm"); var separator = "____________________________________________________________________________"; var text = @"Cuando Sebastian Thrun empezó a trabajar en coches de conducción autónoma, en 2007, para "; text += "Google, muy poca gente fuera de la empresa le tomó en serio. “Podría contaros como CEOs muy "; text += "veteranos de las empresas automotrices más grandes de América me daban la mano para después "; text += "ignorarme porque no merecía la pena hablar conmigo”, comentaba Thrun, en una entrevista a Recode "; text += "a principios de semana"; var doc = nlp.GetDocument(text); Console.WriteLine("Pipeline:"); Console.WriteLine(string.Join(",", nlp.PipeNames)); Console.WriteLine(separator); Console.WriteLine("Tokenization"); Console.Write("["); foreach (var token in doc.Tokens) { Console.Write("'" + token.Text + "', "); } Console.WriteLine("\b\b]"); Console.WriteLine(separator); Console.WriteLine("Pos"); Console.Write("["); foreach (var token in doc.Tokens) { Console.Write("'" + token.PoS + "', "); } Console.WriteLine("\b\b]"); Console.WriteLine(separator); Console.WriteLine("PoS[0]:"); var token0 = doc.Tokens[0]; Console.WriteLine("Fine-grained POS tag " + token0.PoS); Console.WriteLine("Coarse-grained POS tag " + token0.Tag); Console.WriteLine("Word shape " + token0.Shape); Console.WriteLine("Alphabetic characters? " + token0.IsAlpha); Console.WriteLine("Punctuation mark? " + token0.IsPunct); Console.WriteLine("Digit? " + token0.IsDigit); Console.WriteLine("Like a number? " + token0.LikeNum); Console.WriteLine("Like an email address? " + token0.LikeEMail); Console.WriteLine(separator); Console.WriteLine("Lemmatization:"); Console.Write("["); foreach (var token in doc.Tokens) { Console.Write("'" + token.Lemma + "', "); } Console.WriteLine("\b\b]"); Console.WriteLine(separator); Console.WriteLine("Sentences:"); Console.Write("["); foreach (var sentence in doc.Sents) { Console.Write("'" + sentence.Text + "', "); } Console.WriteLine("\b\b]"); Console.WriteLine(separator); Console.WriteLine("Noun Phrases:"); Console.Write("["); foreach (var nounChunk in doc.NounChunks) { Console.Write("'" + nounChunk.Text + "', "); } Console.WriteLine("\b\b]"); Console.WriteLine(separator); Console.WriteLine("Entities (Named entities, phrases and concepts):"); foreach (var entity in doc.Ents) { Console.WriteLine("Entity: " + entity.Text + "\tLabel: " + entity.Label); } Console.WriteLine(separator); }