예제 #1
0
        /* --- unused
         * double gprime(double lambdaP, int index) {
         * double s = 0.0;
         * for (int i = 0; i < p.functions.get(index).len(); i++) {
         * int y = ((TaggerFeature) (p.functions.get(index))).getYTag();
         * int x = (p.functions.get(index)).getX(i);
         * s = s + p.data.ptildeX(x) * pcond(y, x) * 1 * Math.exp(lambdaP * fnum(x, y)) * fnum(x, y);
         * }
         * return s;
         * }
         * --- */
        internal override double FExpected(Feature f)
        {
            TaggerFeature tF = (TaggerFeature)f;
            double        s  = 0.0;
            int           y  = tF.GetYTag();

            for (int i = 0; i < f.Len(); i++)
            {
                int x = tF.GetX(i);
                s = s + p.data.PtildeX(x) * Pcond(y, x);
            }
            return(s);
        }
 /// <summary>This method uses and deletes a file tempXXXXXX.x in the current directory!</summary>
 private void GetFeaturesNew()
 {
     // todo: Change to rethrow a RuntimeIOException.
     // todo: can fnumArr overflow?
     try
     {
         log.Info("TaggerExperiments.getFeaturesNew: initializing fnumArr.");
         fnumArr = new byte[xSize][];
         // what is the maximum number of active features
         File             hFile = File.CreateTempFile("temp", ".x", new File("./"));
         RandomAccessFile hF    = new RandomAccessFile(hFile, "rw");
         log.Info("  length of sTemplates keys: " + sTemplates.Count);
         log.Info("getFeaturesNew adding features ...");
         int  current  = 0;
         int  numFeats = 0;
         bool Verbose  = false;
         foreach (FeatureKey fK in sTemplates)
         {
             int   numF = fK.num;
             int[] xValues;
             Pair <int, string> wT = new Pair <int, string>(numF, fK.val);
             xValues = tFeature.GetXValues(wT);
             if (xValues == null)
             {
                 log.Info("  xValues is null: " + fK);
                 //  + " " + i
                 continue;
             }
             int numEvidence = 0;
             int y           = maxentTagger.tags.GetIndex(fK.tag);
             foreach (int xValue in xValues)
             {
                 if (maxentTagger.occurringTagsOnly)
                 {
                     //check whether the current word in x has occurred with y
                     string word = ExtractorFrames.cWord.Extract(tHistories.GetHistory(xValue));
                     if (maxentTagger.dict.GetCount(word, fK.tag) == 0)
                     {
                         continue;
                     }
                 }
                 if (maxentTagger.possibleTagsOnly)
                 {
                     string               word = ExtractorFrames.cWord.Extract(tHistories.GetHistory(xValue));
                     string[]             tags = maxentTagger.dict.GetTags(word);
                     ICollection <string> s    = Generics.NewHashSet(Arrays.AsList(maxentTagger.tags.DeterministicallyExpandTags(tags)));
                     System.Console.Error.Printf("possible tags for %s: %s\n", word, Arrays.ToString(Sharpen.Collections.ToArray(s)));
                     if (!s.Contains(fK.tag))
                     {
                         continue;
                     }
                 }
                 numEvidence += this.px[xValue];
             }
             if (Populated(numF, numEvidence))
             {
                 int[] positions = tFeature.GetPositions(fK);
                 if (maxentTagger.occurringTagsOnly || maxentTagger.possibleTagsOnly)
                 {
                     // TODO
                     positions = null;
                 }
                 if (positions == null)
                 {
                     // write this in the file and create a TaggerFeature for it
                     //int numElem
                     int numElements = 0;
                     foreach (int x in xValues)
                     {
                         if (maxentTagger.occurringTagsOnly)
                         {
                             //check whether the current word in x has occurred with y
                             string word = ExtractorFrames.cWord.Extract(tHistories.GetHistory(x));
                             if (maxentTagger.dict.GetCount(word, fK.tag) == 0)
                             {
                                 continue;
                             }
                         }
                         if (maxentTagger.possibleTagsOnly)
                         {
                             string               word = ExtractorFrames.cWord.Extract(tHistories.GetHistory(x));
                             string[]             tags = maxentTagger.dict.GetTags(word);
                             ICollection <string> s    = Generics.NewHashSet(Arrays.AsList(maxentTagger.tags.DeterministicallyExpandTags(tags)));
                             if (!s.Contains(fK.tag))
                             {
                                 continue;
                             }
                         }
                         numElements++;
                         hF.WriteInt(x);
                         fnumArr[x][y]++;
                     }
                     TaggerFeature tF = new TaggerFeature(current, current + numElements - 1, fK, maxentTagger.GetTagIndex(fK.tag), this);
                     tFeature.AddPositions(current, current + numElements - 1, fK);
                     current = current + numElements;
                     feats.Add(tF);
                 }
                 else
                 {
                     foreach (int x in xValues)
                     {
                         fnumArr[x][y]++;
                     }
                     // this is the second time to write these values
                     TaggerFeature tF = new TaggerFeature(positions[0], positions[1], fK, maxentTagger.GetTagIndex(fK.tag), this);
                     feats.Add(tF);
                 }
                 // TODO: rearrange some of this code, such as not needing to
                 // look up the tag # in the index
                 if (maxentTagger.fAssociations.Count <= fK.num)
                 {
                     for (int i = maxentTagger.fAssociations.Count; i <= fK.num; ++i)
                     {
                         maxentTagger.fAssociations.Add(Generics.NewHashMap <string, int[]>());
                     }
                 }
                 IDictionary <string, int[]> fValueAssociations = maxentTagger.fAssociations[fK.num];
                 int[] fTagAssociations = fValueAssociations[fK.val];
                 if (fTagAssociations == null)
                 {
                     fTagAssociations = new int[ySize];
                     for (int i = 0; i < ySize; ++i)
                     {
                         fTagAssociations[i] = -1;
                     }
                     fValueAssociations[fK.val] = fTagAssociations;
                 }
                 fTagAssociations[maxentTagger.tags.GetIndex(fK.tag)] = numFeats;
                 numFeats++;
             }
         }
         // foreach FeatureKey fK
         // read out the file and put everything in an array of ints stored in Feats
         tFeature.Release();
         feats.xIndexed = new int[current];
         hF.Seek(0);
         int current1 = 0;
         while (current1 < current)
         {
             feats.xIndexed[current1] = hF.ReadInt();
             current1++;
         }
         log.Info("  total feats: " + sTemplates.Count + ", populated: " + numFeats);
         hF.Close();
         hFile.Delete();
         // what is the maximum number of active features per pair
         int max      = 0;
         int maxGt    = 0;
         int numZeros = 0;
         for (int x_1 = 0; x_1 < xSize; x_1++)
         {
             int numGt = 0;
             for (int y = 0; y < ySize; y++)
             {
                 if (fnumArr[x_1][y] > 0)
                 {
                     numGt++;
                     if (max < fnumArr[x_1][y])
                     {
                         max = fnumArr[x_1][y];
                     }
                 }
                 else
                 {
                     // if 00
                     numZeros++;
                 }
             }
             if (maxGt < numGt)
             {
                 maxGt = numGt;
             }
         }
         // for x
         log.Info("  Max features per x,y pair: " + max);
         log.Info("  Max non-zero y values for an x: " + maxGt);
         log.Info("  Number of non-zero feature x,y pairs: " + (xSize * ySize - numZeros));
         log.Info("  Number of zero feature x,y pairs: " + numZeros);
         log.Info("end getFeaturesNew.");
     }
     catch (Exception e)
     {
         throw new RuntimeIOException(e);
     }
 }