C# (CSharp) DataInstance.GetTokens примеры использования

Язык программирования: C# (CSharp)

Класс/Тип: DataInstance

Метод/Функция: GetTokens

Примеров на hotexamples.com: 4

C# (CSharp) DataInstance.GetTokens - 4 примера найдено. Это лучшие примеры C# (CSharp) кода для DataInstance.GetTokens, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Instance(27)

FindList(4)

GetTokens(4)

Add(1)

Close(1)

GetNewSurfaceInstance(1)

GetObject(1)

GetType(1)

ToString(1)

addAdmin(1)

addAttendee(1)

addHost(1)

Пример #1

Показать файл

Файл: DepPatternFactory.cs Проект: zerouid/Stanford.CoreNLP.NET

        public static DepPattern PatternToDepPattern(Pair <IndexedWord, GrammaticalRelation> p, DataInstance sent)
        {
            Token     token        = new Token(PatternFactory.PatternType.Dep);
            CoreLabel backingLabel = sent.GetTokens()[p.First().Index() - 1];

            System.Diagnostics.Debug.Assert(backingLabel.ContainsKey(typeof(PatternsAnnotations.ProcessedTextAnnotation)), "the keyset are " + backingLabel.ToString(CoreLabel.OutputFormat.All));
            token.AddORRestriction(typeof(PatternsAnnotations.ProcessedTextAnnotation), backingLabel.Get(typeof(PatternsAnnotations.ProcessedTextAnnotation)));
            return(new DepPattern(token, p.Second()));
        }

Пример #2

Показать файл

Файл: ApplyDepPatterns.cs Проект: zerouid/Stanford.CoreNLP.NET

        private ICollection <ExtractedPhrase> GetMatchedTokensIndex(SemanticGraph graph, SemgrexPattern pattern, DataInstance sent, string label)
        {
            //TODO: look at the ignoreCommonTags flag
            ExtractPhraseFromPattern extract       = new ExtractPhraseFromPattern(false, PatternFactory.numWordsCompoundMapped[label]);
            ICollection <IntPair>    outputIndices = new List <IntPair>();
            bool findSubTrees         = true;
            IList <CoreLabel> tokensC = sent.GetTokens();
            //TODO: see if you can get rid of this (only used for matchedGraphs)
            IList <string>          tokens           = tokensC.Stream().Map(null).Collect(Collectors.ToList());
            IList <string>          outputPhrases    = new List <string>();
            IList <ExtractedPhrase> extractedPhrases = new List <ExtractedPhrase>();
            Func <Pair <IndexedWord, SemanticGraph>, ICounter <string> > extractFeatures = new _IFunction_206();

            //TODO: make features;
            extract.GetSemGrexPatternNodes(graph, tokens, outputPhrases, outputIndices, pattern, findSubTrees, extractedPhrases, constVars.matchLowerCaseContext, matchingWordRestriction);

            /*
             * //TODO: probably a bad idea to add ALL ngrams
             * Collection<ExtractedPhrase> outputIndicesMaxPhraseLen = new ArrayList<ExtractedPhrase>();
             * for(IntPair o: outputIndices){
             * int min = o.get(0);
             * int max = o.get(1);
             *
             * for (int i = min; i <= max ; i++) {
             *
             * CoreLabel t = tokensC.get(i);
             * String phrase = t.word();
             * if(!matchedRestriction(t, label))
             * continue;
             * for (int ngramSize = 1; ngramSize < PatternFactory.numWordsCompound; ++ngramSize) {
             * int j = i + ngramSize - 1;
             * if(j > max)
             * break;
             *
             * CoreLabel tokenj = tokensC.get(j);
             *
             * if(ngramSize > 1)
             * phrase += " " + tokenj.word();
             *
             * if (matchedRestriction(tokenj, label)) {
             * outputIndicesMaxPhraseLen.add(new ExtractedPhrase(i, j, phrase));
             * //outputIndicesMaxPhraseLen.add(new IntPair(i, j));
             * }
             * }
             * }
             * }*/
            //System.out.println("extracted phrases are " + extractedPhrases + " and output indices are " + outputIndices);
            return(extractedPhrases);
        }

Пример #3

Показать файл

        // && !text.contains("+") &&
        // !text.contains("*");// && !
        // text.contains("$") && !text.contains("\"");
        public static IDictionary <int, ISet> GetPatternsAroundTokens(DataInstance sent, ICollection <CandidatePhrase> stopWords)
        {
            IDictionary <int, ISet> p      = new Dictionary <int, ISet>();
            IList <CoreLabel>       tokens = sent.GetTokens();

            for (int i = 0; i < tokens.Count; i++)
            {
                //          p.put(
                //              i,
                //              new Triple<Set<Integer>, Set<Integer>, Set<Integer>>(
                //                  new HashSet<Integer>(), new HashSet<Integer>(),
                //                  new HashSet<Integer>()));
                p[i] = new HashSet <SurfacePattern>();
                CoreLabel token = tokens[i];
                // do not create patterns around stop words!
                if (PatternFactory.DoNotUse(token.Word(), stopWords))
                {
                    continue;
                }
                ICollection <SurfacePattern> pat = GetContext(sent.GetTokens(), i, stopWords);
                p[i] = pat;
            }
            return(p);
        }

Пример #4

Показать файл

Файл: ApplyDepPatterns.cs Проект: zerouid/Stanford.CoreNLP.NET

        /// <exception cref="System.Exception"/>
        public virtual Pair <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> > > Call()
        {
            // CollectionValuedMap<String, Integer> tokensMatchedPattern = new
            // CollectionValuedMap<String, Integer>();
            TwoDimensionalCounter <CandidatePhrase, E>          allFreq            = new TwoDimensionalCounter <CandidatePhrase, E>();
            CollectionValuedMap <E, Triple <string, int, int> > matchedTokensByPat = new CollectionValuedMap <E, Triple <string, int, int> >();

            foreach (string sentid in sentids)
            {
                DataInstance      sent   = sents[sentid];
                IList <CoreLabel> tokens = sent.GetTokens();
                foreach (KeyValuePair <SemgrexPattern, E> pEn in patterns)
                {
                    if (pEn.Key == null)
                    {
                        throw new Exception("why is the pattern " + pEn + " null?");
                    }
                    SemanticGraph graph = ((DataInstanceDep)sent).GetGraph();
                    //SemgrexMatcher m = pEn.getKey().matcher(graph);
                    //TokenSequenceMatcher m = pEn.getKey().matcher(sent);
                    //        //Setting this find type can save time in searching - greedy and reluctant quantifiers are not enforced
                    //        m.setFindType(SequenceMatcher.FindType.FIND_ALL);
                    //Higher branch values makes the faster but uses more memory
                    //m.setBranchLimit(5);
                    ICollection <ExtractedPhrase> matched = GetMatchedTokensIndex(graph, pEn.Key, sent, label);
                    foreach (ExtractedPhrase match in matched)
                    {
                        int    s                 = match.startIndex;
                        int    e                 = match.endIndex + 1;
                        string phrase            = string.Empty;
                        string phraseLemma       = string.Empty;
                        bool   useWordNotLabeled = false;
                        bool   doNotUse          = false;
                        //find if the neighboring words are labeled - if so - club them together
                        if (constVars.clubNeighboringLabeledWords)
                        {
                            for (int i = s - 1; i >= 0; i--)
                            {
                                if (tokens[i].Get(constVars.GetAnswerClass()[label]).Equals(label) && (e - i + 1) <= PatternFactory.numWordsCompoundMapped[label])
                                {
                                    s = i;
                                }
                                else
                                {
                                    //System.out.println("for phrase " + match + " clubbing earlier word. new s is " + s);
                                    break;
                                }
                            }
                            for (int i_1 = e; i_1 < tokens.Count; i_1++)
                            {
                                if (tokens[i_1].Get(constVars.GetAnswerClass()[label]).Equals(label) && (i_1 - s + 1) <= PatternFactory.numWordsCompoundMapped[label])
                                {
                                    e = i_1;
                                }
                                else
                                {
                                    //System.out.println("for phrase " + match + " clubbing next word. new e is " + e);
                                    break;
                                }
                            }
                        }
                        //to make sure we discard phrases with stopwords in between, but include the ones in which stop words were removed at the ends if removeStopWordsFromSelectedPhrases is true
                        bool[] addedindices = new bool[e - s];
                        // Arrays.fill(addedindices, false); // get for free on array initialization
                        for (int i_2 = s; i_2 < e; i_2++)
                        {
                            CoreLabel l = tokens[i_2];
                            l.Set(typeof(PatternsAnnotations.MatchedPattern), true);
                            if (!l.ContainsKey(typeof(PatternsAnnotations.MatchedPatterns)) || l.Get(typeof(PatternsAnnotations.MatchedPatterns)) == null)
                            {
                                l.Set(typeof(PatternsAnnotations.MatchedPatterns), new HashSet <Pattern>());
                            }
                            Pattern pSur = pEn.Value;
                            System.Diagnostics.Debug.Assert(pSur != null, "Why is " + pEn.Value + " not present in the index?!");
                            System.Diagnostics.Debug.Assert(l.Get(typeof(PatternsAnnotations.MatchedPatterns)) != null, "How come MatchedPatterns class is null for the token. The classes in the key set are " + l.KeySet());
                            l.Get(typeof(PatternsAnnotations.MatchedPatterns)).Add(pSur);
                            foreach (KeyValuePair <Type, object> ig in constVars.GetIgnoreWordswithClassesDuringSelection()[label])
                            {
                                if (l.ContainsKey(ig.Key) && l.Get(ig.Key).Equals(ig.Value))
                                {
                                    doNotUse = true;
                                }
                            }
                            bool containsStop = ContainsStopWord(l, constVars.GetCommonEngWords(), PatternFactory.ignoreWordRegex);
                            if (removePhrasesWithStopWords && containsStop)
                            {
                                doNotUse = true;
                            }
                            else
                            {
                                if (!containsStop || !removeStopWordsFromSelectedPhrases)
                                {
                                    if (label == null || l.Get(constVars.GetAnswerClass()[label]) == null || !l.Get(constVars.GetAnswerClass()[label]).Equals(label))
                                    {
                                        useWordNotLabeled = true;
                                    }
                                    phrase               += " " + l.Word();
                                    phraseLemma          += " " + l.Lemma();
                                    addedindices[i_2 - s] = true;
                                }
                            }
                        }
                        for (int i_3 = 0; i_3 < addedindices.Length; i_3++)
                        {
                            if (i_3 > 0 && i_3 < addedindices.Length - 1 && addedindices[i_3 - 1] == true && addedindices[i_3] == false && addedindices[i_3 + 1] == true)
                            {
                                doNotUse = true;
                                break;
                            }
                        }
                        if (!doNotUse && useWordNotLabeled)
                        {
                            matchedTokensByPat.Add(pEn.Value, new Triple <string, int, int>(sentid, s, e - 1));
                            if (useWordNotLabeled)
                            {
                                phrase      = phrase.Trim();
                                phraseLemma = phraseLemma.Trim();
                                allFreq.IncrementCount(CandidatePhrase.CreateOrGet(phrase, phraseLemma, match.GetFeatures()), pEn.Value, 1.0);
                            }
                        }
                    }
                }
            }
            return(new Pair <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> > >(allFreq, matchedTokensByPat));
        }