Ejemplo n.º 1
0
        private void ComputeEmpiricalStatistics(IList <F> geFeatures)
        {
            //allocate memory to the containers and initialize them
            geFeature2EmpiricalDist = new double[][] {  };
            geFeature2DatumList     = new List <IList <int> >(geFeatures.Count);
            IDictionary <F, int> geFeatureMap            = Generics.NewHashMap();
            ICollection <int>    activeUnlabeledExamples = Generics.NewHashSet();

            for (int n = 0; n < geFeatures.Count; n++)
            {
                F geFeature = geFeatures[n];
                geFeature2DatumList.Add(new List <int>());
                Arrays.Fill(geFeature2EmpiricalDist[n], 0);
                geFeatureMap[geFeature] = n;
            }
            //compute the empirical label distribution for each GE feature
            for (int i = 0; i < labeledDataset.Size(); i++)
            {
                IDatum <L, F> datum   = labeledDataset.GetDatum(i);
                int           labelID = labeledDataset.labelIndex.IndexOf(datum.Label());
                foreach (F feature in datum.AsFeatures())
                {
                    if (geFeatureMap.Contains(feature))
                    {
                        int geFnum = geFeatureMap[feature];
                        geFeature2EmpiricalDist[geFnum][labelID]++;
                    }
                }
            }
            //now normalize and smooth the label distribution for each feature.
            for (int n_1 = 0; n_1 < geFeatures.Count; n_1++)
            {
                ArrayMath.Normalize(geFeature2EmpiricalDist[n_1]);
                SmoothDistribution(geFeature2EmpiricalDist[n_1]);
            }
            //now build the inverted index from each GE feature to unlabeled datums that contain it.
            for (int i_1 = 0; i_1 < unlabeledDataList.Count; i_1++)
            {
                IDatum <L, F> datum = unlabeledDataList[i_1];
                foreach (F feature in datum.AsFeatures())
                {
                    if (geFeatureMap.Contains(feature))
                    {
                        int geFnum = geFeatureMap[feature];
                        geFeature2DatumList[geFnum].Add(i_1);
                        activeUnlabeledExamples.Add(i_1);
                    }
                }
            }
            System.Console.Out.WriteLine("Number of active unlabeled examples:" + activeUnlabeledExamples.Count);
        }