예제 #1
0
        public static void Run()
        {
            var spacy = new Spacy();

            var nlp = spacy.Load("en_core_web_sm");
            var doc = nlp.GetDocument("Apple is looking at buying U.K. startup for $1 billion");

            foreach (Token token in doc.Tokens)
            {
                Console.WriteLine($"{token.Text} {token.Lemma} {token.PoS} {token.Tag} {token.Dep} {token.Shape} {token.IsAlpha} {token.IsStop}");
            }

            Console.WriteLine("");
            foreach (Span ent in doc.Ents)
            {
                Console.WriteLine($"{ent.Text} {ent.StartChar} {ent.EndChar} {ent.Label}");
            }

            nlp = spacy.Load("en_core_web_md");
            var tokens = nlp.GetDocument("dog cat banana afskfsd");

            Console.WriteLine("");
            foreach (Token token in tokens.Tokens)
            {
                Console.WriteLine($"{token.Text} {token.HasVector} {token.VectorNorm}, {token.IsOov}");
            }

            tokens = nlp.GetDocument("dog cat banana");
            Console.WriteLine("");
            foreach (Token token1 in tokens.Tokens)
            {
                foreach (Token token2 in tokens.Tokens)
                {
                    Console.WriteLine($"{token1.Text} {token2.Text} {token1.Similarity(token2) }");
                }
            }

            doc = nlp.GetDocument("I love coffee");
            Console.WriteLine("");
            Console.WriteLine(doc.Vocab.Strings["coffee"]);
            Console.WriteLine(doc.Vocab.Strings[3197928453018144401]);

            Console.WriteLine("");
            foreach (Token word in doc.Tokens)
            {
                var lexeme = doc.Vocab[word.Text];
                Console.WriteLine($@"{lexeme.Text} {lexeme.Orth} {lexeme.Shape} {lexeme.Prefix} {lexeme.Suffix} {lexeme.IsAlpha} {lexeme.IsDigit} {lexeme.IsTitle} {lexeme.Lang}");
            }
        }
예제 #2
0
        public static void Run()
        {
            var spacy = new Spacy();
            var nlp   = spacy.Load("en_core_web_sm");

            var doc      = nlp.GetDocument("Apple is looking at buying U.K. startup for $1 billion");
            var displacy = new Displacy();

            displacy.Serve(doc, "dep");
        }
예제 #3
0
        public static void Run()
        {
            var spacy = new Spacy();
            var nlp   = spacy.Load("en_core_web_sm");

            var text = "Autonomous cars shift insurance liability toward manufacturers";
            var doc  = nlp.GetDocument(text);

            foreach (var token in doc.Tokens)
            {
                var childs = new List <string>();
                token.Children.ForEach(c => childs.Add(c.Text));
                Console.WriteLine($"{token.Text} {token.Dep} {token.Head.Text} [{string.Join(", ", childs)}]");
            }
        }
예제 #4
0
        public static void Run()
        {
            var spacy = new Spacy();

            var text = "I love coffee";

            // Load base document
            var nlp     = spacy.Load("en_core_web_sm");
            var docBase = nlp.GetDocument(text);

            Console.WriteLine("");
            PrintDoc(docBase);

            // Serialize document to disk and bytes
            docBase.ToDisk("doc.spacy");
            var docBaseBytes = docBase.ToBytes();

            // Serialize using DocBin
            var docBinBase = new DocBin(attrs: new string[] { "ENT_IOB", "POS", "HEAD", "DEP", "ENT_TYPE" }, storeUserData: true);

            docBinBase.Add(docBase);
            var docBinBaseBytes = docBinBase.ToBytes();

            // Restore document from disk
            var doc = new Doc(new Vocab());

            doc.FromDisk("doc.spacy");
            Console.WriteLine("");
            PrintDoc(doc);

            // Restore document from bytes
            doc = new Doc(new Vocab());
            doc.FromBytes(docBaseBytes);
            Console.WriteLine("");
            PrintDoc(doc);

            // Restore using DocBin
            var docBin = new DocBin();

            docBin.FromBytes(docBinBaseBytes);
            var docs = docBin.GetDocs(nlp.Vocab);

            Console.WriteLine("");
            PrintDoc(docs[0]);
        }
예제 #5
0
        public static void Run()
        {
            var spacy = new Spacy();
            var nlp   = spacy.Load("es_core_news_sm");

            var separator = "____________________________________________________________________________";
            var text      = @"Cuando Sebastian Thrun empezó a trabajar en coches de conducción autónoma, en 2007, para ";

            text += "Google, muy poca gente fuera de la empresa le tomó en serio. “Podría contaros como CEOs muy ";
            text += "veteranos de las empresas automotrices más grandes de América me daban la mano para después ";
            text += "ignorarme porque no merecía la pena hablar conmigo”, comentaba Thrun, en una entrevista a Recode ";
            text += "a principios de semana";

            var doc = nlp.GetDocument(text);

            Console.WriteLine("Pipeline:");
            Console.WriteLine(string.Join(",", nlp.PipeNames));
            Console.WriteLine(separator);

            Console.WriteLine("Tokenization");
            Console.Write("[");
            foreach (var token in doc.Tokens)
            {
                Console.Write("'" + token.Text + "', ");
            }
            Console.WriteLine("\b\b]");
            Console.WriteLine(separator);

            Console.WriteLine("Pos");
            Console.Write("[");
            foreach (var token in doc.Tokens)
            {
                Console.Write("'" + token.PoS + "', ");
            }
            Console.WriteLine("\b\b]");
            Console.WriteLine(separator);

            Console.WriteLine("PoS[0]:");
            var token0 = doc.Tokens[0];

            Console.WriteLine("Fine-grained POS tag " + token0.PoS);
            Console.WriteLine("Coarse-grained POS tag " + token0.Tag);
            Console.WriteLine("Word shape " + token0.Shape);
            Console.WriteLine("Alphabetic characters? " + token0.IsAlpha);
            Console.WriteLine("Punctuation mark? " + token0.IsPunct);
            Console.WriteLine("Digit? " + token0.IsDigit);
            Console.WriteLine("Like a number? " + token0.LikeNum);
            Console.WriteLine("Like an email address? " + token0.LikeEMail);
            Console.WriteLine(separator);

            Console.WriteLine("Lemmatization:");
            Console.Write("[");
            foreach (var token in doc.Tokens)
            {
                Console.Write("'" + token.Lemma + "', ");
            }
            Console.WriteLine("\b\b]");
            Console.WriteLine(separator);

            Console.WriteLine("Sentences:");
            Console.Write("[");
            foreach (var sentence in doc.Sents)
            {
                Console.Write("'" + sentence.Text + "', ");
            }
            Console.WriteLine("\b\b]");
            Console.WriteLine(separator);

            Console.WriteLine("Noun Phrases:");
            Console.Write("[");
            foreach (var nounChunk in doc.NounChunks)
            {
                Console.Write("'" + nounChunk.Text + "', ");
            }
            Console.WriteLine("\b\b]");
            Console.WriteLine(separator);

            Console.WriteLine("Entities (Named entities, phrases and concepts):");
            foreach (var entity in doc.Ents)
            {
                Console.WriteLine("Entity: " + entity.Text + "\tLabel: " + entity.Label);
            }
            Console.WriteLine(separator);
        }