/* --- unused * double gprime(double lambdaP, int index) { * double s = 0.0; * for (int i = 0; i < p.functions.get(index).len(); i++) { * int y = ((TaggerFeature) (p.functions.get(index))).getYTag(); * int x = (p.functions.get(index)).getX(i); * s = s + p.data.ptildeX(x) * pcond(y, x) * 1 * Math.exp(lambdaP * fnum(x, y)) * fnum(x, y); * } * return s; * } * --- */ internal override double FExpected(Feature f) { TaggerFeature tF = (TaggerFeature)f; double s = 0.0; int y = tF.GetYTag(); for (int i = 0; i < f.Len(); i++) { int x = tF.GetX(i); s = s + p.data.PtildeX(x) * Pcond(y, x); } return(s); }
/// <summary>This method uses and deletes a file tempXXXXXX.x in the current directory!</summary> private void GetFeaturesNew() { // todo: Change to rethrow a RuntimeIOException. // todo: can fnumArr overflow? try { log.Info("TaggerExperiments.getFeaturesNew: initializing fnumArr."); fnumArr = new byte[xSize][]; // what is the maximum number of active features File hFile = File.CreateTempFile("temp", ".x", new File("./")); RandomAccessFile hF = new RandomAccessFile(hFile, "rw"); log.Info(" length of sTemplates keys: " + sTemplates.Count); log.Info("getFeaturesNew adding features ..."); int current = 0; int numFeats = 0; bool Verbose = false; foreach (FeatureKey fK in sTemplates) { int numF = fK.num; int[] xValues; Pair <int, string> wT = new Pair <int, string>(numF, fK.val); xValues = tFeature.GetXValues(wT); if (xValues == null) { log.Info(" xValues is null: " + fK); // + " " + i continue; } int numEvidence = 0; int y = maxentTagger.tags.GetIndex(fK.tag); foreach (int xValue in xValues) { if (maxentTagger.occurringTagsOnly) { //check whether the current word in x has occurred with y string word = ExtractorFrames.cWord.Extract(tHistories.GetHistory(xValue)); if (maxentTagger.dict.GetCount(word, fK.tag) == 0) { continue; } } if (maxentTagger.possibleTagsOnly) { string word = ExtractorFrames.cWord.Extract(tHistories.GetHistory(xValue)); string[] tags = maxentTagger.dict.GetTags(word); ICollection <string> s = Generics.NewHashSet(Arrays.AsList(maxentTagger.tags.DeterministicallyExpandTags(tags))); System.Console.Error.Printf("possible tags for %s: %s\n", word, Arrays.ToString(Sharpen.Collections.ToArray(s))); if (!s.Contains(fK.tag)) { continue; } } numEvidence += this.px[xValue]; } if (Populated(numF, numEvidence)) { int[] positions = tFeature.GetPositions(fK); if (maxentTagger.occurringTagsOnly || maxentTagger.possibleTagsOnly) { // TODO positions = null; } if (positions == null) { // write this in the file and create a TaggerFeature for it //int numElem int numElements = 0; foreach (int x in xValues) { if (maxentTagger.occurringTagsOnly) { //check whether the current word in x has occurred with y string word = ExtractorFrames.cWord.Extract(tHistories.GetHistory(x)); if (maxentTagger.dict.GetCount(word, fK.tag) == 0) { continue; } } if (maxentTagger.possibleTagsOnly) { string word = ExtractorFrames.cWord.Extract(tHistories.GetHistory(x)); string[] tags = maxentTagger.dict.GetTags(word); ICollection <string> s = Generics.NewHashSet(Arrays.AsList(maxentTagger.tags.DeterministicallyExpandTags(tags))); if (!s.Contains(fK.tag)) { continue; } } numElements++; hF.WriteInt(x); fnumArr[x][y]++; } TaggerFeature tF = new TaggerFeature(current, current + numElements - 1, fK, maxentTagger.GetTagIndex(fK.tag), this); tFeature.AddPositions(current, current + numElements - 1, fK); current = current + numElements; feats.Add(tF); } else { foreach (int x in xValues) { fnumArr[x][y]++; } // this is the second time to write these values TaggerFeature tF = new TaggerFeature(positions[0], positions[1], fK, maxentTagger.GetTagIndex(fK.tag), this); feats.Add(tF); } // TODO: rearrange some of this code, such as not needing to // look up the tag # in the index if (maxentTagger.fAssociations.Count <= fK.num) { for (int i = maxentTagger.fAssociations.Count; i <= fK.num; ++i) { maxentTagger.fAssociations.Add(Generics.NewHashMap <string, int[]>()); } } IDictionary <string, int[]> fValueAssociations = maxentTagger.fAssociations[fK.num]; int[] fTagAssociations = fValueAssociations[fK.val]; if (fTagAssociations == null) { fTagAssociations = new int[ySize]; for (int i = 0; i < ySize; ++i) { fTagAssociations[i] = -1; } fValueAssociations[fK.val] = fTagAssociations; } fTagAssociations[maxentTagger.tags.GetIndex(fK.tag)] = numFeats; numFeats++; } } // foreach FeatureKey fK // read out the file and put everything in an array of ints stored in Feats tFeature.Release(); feats.xIndexed = new int[current]; hF.Seek(0); int current1 = 0; while (current1 < current) { feats.xIndexed[current1] = hF.ReadInt(); current1++; } log.Info(" total feats: " + sTemplates.Count + ", populated: " + numFeats); hF.Close(); hFile.Delete(); // what is the maximum number of active features per pair int max = 0; int maxGt = 0; int numZeros = 0; for (int x_1 = 0; x_1 < xSize; x_1++) { int numGt = 0; for (int y = 0; y < ySize; y++) { if (fnumArr[x_1][y] > 0) { numGt++; if (max < fnumArr[x_1][y]) { max = fnumArr[x_1][y]; } } else { // if 00 numZeros++; } } if (maxGt < numGt) { maxGt = numGt; } } // for x log.Info(" Max features per x,y pair: " + max); log.Info(" Max non-zero y values for an x: " + maxGt); log.Info(" Number of non-zero feature x,y pairs: " + (xSize * ySize - numZeros)); log.Info(" Number of zero feature x,y pairs: " + numZeros); log.Info("end getFeaturesNew."); } catch (Exception e) { throw new RuntimeIOException(e); } }