コード例 #1
0
        public virtual void TestPairsHolder()
        {
            PairsHolder pairsHolder = new PairsHolder();

            for (int i = 0; i < 10; i++)
            {
                pairsHolder.Add(new WordTag("girl", "NN"));
            }
            MaxentTagger maxentTagger = new MaxentTagger();

            maxentTagger.Init(null);
            //maxentTagger.pairs = pairsHolder;
            History           h  = new History(0, 5, 3, pairsHolder, maxentTagger.extractors);
            TaggerExperiments te = new TaggerExperiments(maxentTagger);
            int x = te.GetHistoryTable().Add(h);
            //int x = maxentTagger.tHistories.add(h);
            int y = te.GetHistoryTable().GetIndex(h);

            //int y = maxentTagger.tHistories.getIndex(h);
            NUnit.Framework.Assert.AreEqual("Failing to get same index for history", x, y);
            Extractor e = new Extractor(0, false);
            string    k = e.Extract(h);

            NUnit.Framework.Assert.AreEqual("Extractor didn't find stored word", k, "girl");
        }
コード例 #2
0
 internal override string Extract(History h, PairsHolder pH)
 {
     // I ran a bunch of timing tests that seem to indicate it is
     // cheaper to simply add string + char + string than use a
     // StringBuilder or go through the StringBuildMemoizer -horatio
     return(pH.GetTag(h, leftPosition) + '!' + pH.GetTag(h, rightPosition));
 }
コード例 #3
0
            internal override string Extract(History h, PairsHolder pH)
            {
                StringBuilder sb = new StringBuilder();

                if (position < 0)
                {
                    for (int idx = position; idx < 0; idx++)
                    {
                        if (idx != position)
                        {
                            sb.Append('!');
                        }
                        sb.Append(pH.GetTag(h, idx));
                    }
                }
                else
                {
                    for (int idx = position; idx > 0; idx--)
                    {
                        if (idx != position)
                        {
                            sb.Append('!');
                        }
                        sb.Append(pH.GetTag(h, idx));
                    }
                }
                return(sb.ToString());
            }
コード例 #4
0
        internal override string Extract(History h, PairsHolder pH)
        {
            string s     = base.Extract(h, pH);
            string shape = WordShapeClassifier.WordShape(s, wordShaper);

            return(shape);
        }
コード例 #5
0
 internal History(PairsHolder pairs, Extractors extractors)
 {
     // this is the index of the first word of the sentence
     //this is the index of the last word in the sentence - the dot
     // this is the index of the current word
     this.pairs      = pairs;
     this.extractors = extractors;
 }
コード例 #6
0
            internal override string Extract(History h, PairsHolder pH)
            {
                string cw = pH.GetWord(h, 0);
                string lk = cw.ToLower(Locale.English);

                if (lk.Equals(cw))
                {
                    return(zeroSt);
                }
                return(cw);
            }
コード例 #7
0
        internal override string Extract(History h, PairsHolder pH)
        {
            StringBuilder sb = new StringBuilder();

            for (int j = left; j <= right; j++)
            {
                string s = pH.GetWord(h, j);
                sb.Append(WordShapeClassifier.WordShape(s, wordShaper));
                if (j < right)
                {
                    sb.Append('|');
                }
            }
            return(sb.ToString());
        }
        internal override string Extract(History h, PairsHolder pH)
        {
            StringBuilder sb = new StringBuilder();

            for (int j = left; j <= right; j++)
            {
                string word    = pH.GetWord(h, j);
                string distSim = lexicon.GetMapping(word);
                sb.Append(distSim);
                if (j < right)
                {
                    sb.Append('|');
                }
            }
            return(sb.ToString());
        }
コード例 #9
0
        protected internal ReadDataTagged(TaggerConfig config, MaxentTagger maxentTagger, PairsHolder pairs)
        {
            //TODO: make a class DataHolder that holds the dict, tags, pairs, etc, for tagger and pass it around
            this.maxentTagger = maxentTagger;
            this.pairs        = pairs;
            IList <TaggedFileRecord> fileRecords = TaggedFileRecord.CreateRecords(config, config.GetFile());
            IDictionary <string, IntCounter <string> > wordTagCounts = Generics.NewHashMap();

            foreach (TaggedFileRecord record in fileRecords)
            {
                LoadFile(record.Reader(), wordTagCounts);
            }
            // By counting the words and then filling the Dictionary, we can
            // make it so there are no calls that mutate the Dictionary or its
            // TagCount objects later
            maxentTagger.dict.FillWordTagCounts(wordTagCounts);
        }
コード例 #10
0
        internal override string Extract(History h, PairsHolder pH)
        {
            string cword    = pH.GetWord(h, 0);
            int    allCount = dict.Sum(cword);
            int    vBNCount = dict.GetCount(cword, vbnTag);
            int    vBDCount = dict.GetCount(cword, vbdTag);

            // Conditions for deciding inapplicable
            if ((allCount == 0) && (!(cword.EndsWith(edSuff) || cword.EndsWith(enSuff))))
            {
                return(zeroSt);
            }
            if ((allCount > 0) && (vBNCount + vBDCount <= allCount / 100))
            {
                return(zeroSt);
            }
            string lastverb = naWord;

            //String lastvtag = zeroSt; // mg: written but never read
            for (int index = -1; index >= -bound; index--)
            {
                string word2 = pH.GetWord(h, index);
                if ("NA".Equals(word2))
                {
                    break;
                }
                if (stopper.Matcher(word2).Matches())
                {
                    break;
                }
                if (vbnWord.Matcher(word2).Matches())
                {
                    lastverb = word2;
                    break;
                }
                index--;
            }
            if (!lastverb.Equals(naWord))
            {
                log.Info("VBN: For " + cword + ", found preceding VBN cue " + lastverb);
                return(oneSt);
            }
            return(zeroSt);
        }
コード例 #11
0
 /// <summary>This method gets feature statistics from a training file found in the TaggerConfig.</summary>
 /// <remarks>
 /// This method gets feature statistics from a training file found in the TaggerConfig.
 /// It is the start of the training process.
 /// </remarks>
 /// <exception cref="System.IO.IOException"/>
 protected internal TaggerExperiments(TaggerConfig config, MaxentTagger maxentTagger)
     : this(maxentTagger)
 {
     log.Info("TaggerExperiments: adding word/tags");
     PairsHolder    pairs = new PairsHolder();
     ReadDataTagged c     = new ReadDataTagged(config, maxentTagger, pairs);
     vArray = new int[][] {  };
     InitTemplatesNew();
     log.Info("Featurizing tagged data tokens...");
     for (int i = 0; i < size; i++)
     {
         DataWordTag d    = c.Get(i);
         string      yS   = d.GetY();
         History     h    = d.GetHistory();
         int         indX = tHistories.Add(h);
         int         indY = d.GetYInd();
         AddTemplatesNew(h, yS);
         AddRareTemplatesNew(h, yS);
         vArray[i][0] = indX;
         vArray[i][1] = indY;
     }
     // It's the 2010s now and it doesn't take so long to featurize....
     // if (i > 0 && (i % 10000) == 0) {
     //   System.err.printf("%d ", i);
     //   if (i % 100000 == 0) { System.err.println(); }
     // }
     // log.info();
     log.Info("Featurized " + c.GetSize() + " data tokens [done].");
     c.Release();
     Ptilde();
     maxentTagger.xSize = xSize;
     maxentTagger.ySize = ySize;
     log.Info("xSize [num Phi templates] = " + xSize + "; ySize [num classes] = " + ySize);
     HashHistories();
     // if we'll look at occurring tags only, we need the histories and pairs still
     if (!maxentTagger.occurringTagsOnly && !maxentTagger.possibleTagsOnly)
     {
         tHistories.Release();
         pairs.Clear();
     }
     GetFeaturesNew();
 }
コード例 #12
0
        internal virtual string ExtractLV(History h, PairsHolder pH, int bound)
        {
            // should extract last verbal word and also the current word
            int    start    = h.start;
            string lastverb = "NA";
            int    current  = h.current;
            int    index    = current - 1;

            while ((index >= start) && (index >= current - bound))
            {
                string tag = pH.GetTag(index);
                if (tag.StartsWith("VB"))
                {
                    lastverb = pH.GetWord(index);
                    break;
                }
                if (tag.StartsWith(","))
                {
                    break;
                }
                index--;
            }
            return(lastverb);
        }
コード例 #13
0
 // By default the bound is ignored, but a few subclasses make use of it.
 internal virtual string Extract(History h, PairsHolder pH, int bound)
 {
     return(Extract(h, pH));
 }
コード例 #14
0
 internal virtual string Extract(History h, PairsHolder pH)
 {
     return(isTag ? pH.GetTag(h, position) : pH.GetWord(h, position));
 }
コード例 #15
0
        internal override string Extract(History h, PairsHolder pH)
        {
            string word = base.Extract(h, pH);

            return(lexicon.GetMapping(word));
        }
コード例 #16
0
 internal History(int start, int end, int current, PairsHolder pairs, Extractors extractors)
 {
     this.pairs      = pairs;
     this.extractors = extractors;
     Init(start, end, current);
 }
コード例 #17
0
        internal override string Extract(History h, PairsHolder pH)
        {
            string tag = base.Extract(h, pH);

            return(tag.StartsWith("vs") ? "1" : "0");
        }
コード例 #18
0
 internal override string Extract(History h, PairsHolder pH)
 {
     return(pH.GetWord(h, position).ToLower(Locale.English));
 }
コード例 #19
0
 internal override string Extract(History h, PairsHolder pH)
 {
     return(pH.GetTag(h, position1) + '!' + pH.GetWord(h, word) + '!' + pH.GetTag(h, position2));
 }
コード例 #20
0
 internal override string Extract(History h, PairsHolder pH)
 {
     return(pH.GetWord(h, leftWord) + '!' + pH.GetTag(h, tag) + '!' + pH.GetWord(h, rightWord));
 }