示例#1
0
        public static ArrayListMultiMap <FeatureVectorAsObject, int> getHPosContextCategoryMap(Corpus corpus)
        {
            ArrayListMultiMap <FeatureVectorAsObject, int> hposByFeatureVectorGroup = ArrayListMultiMap <FeatureVectorAsObject, int> .create();

            int numContexts = corpus.featureVectors.Count;

            for (int i = 0; i < numContexts; i++)
            {
                int[] X = corpus.featureVectors[i];
                int   y = corpus.hpos[i];
                hposByFeatureVectorGroup.Add(new FeatureVectorAsObject(X, Trainer.FEATURES_HPOS), y);
            }

            return(hposByFeatureVectorGroup);
        }
示例#2
0
        public static ArrayListMultiMap <FeatureVectorAsObject, int> getWSContextCategoryMap(Corpus corpus)
        {
            ArrayListMultiMap <FeatureVectorAsObject, int> wsByFeatureVectorGroup = ArrayListMultiMap <FeatureVectorAsObject, int> .create();

            int numContexts = corpus.featureVectors.Count;

            for (int i = 0; i < numContexts; i++)
            {
                int[] X = corpus.featureVectors[i];
                int   y = corpus.injectWhitespace[i];
                wsByFeatureVectorGroup.Add(new FeatureVectorAsObject(X, Trainer.FEATURES_INJECT_WS), y);
            }

            return(wsByFeatureVectorGroup);
        }
示例#3
0
        public static void examineCorpus(Corpus corpus)
        {
            ArrayListMultiMap <FeatureVectorAsObject, int> wsByFeatureVectorGroup = ArrayListMultiMap <FeatureVectorAsObject, int> .create();

            ArrayListMultiMap <FeatureVectorAsObject, int> hposByFeatureVectorGroup = ArrayListMultiMap <FeatureVectorAsObject, int> .create();

            int numContexts = corpus.featureVectors.Count;

            for (int i = 0; i < numContexts; i++)
            {
                int[] X  = corpus.featureVectors[i];
                int   y1 = corpus.injectWhitespace[i];
                int   y2 = corpus.hpos[i];
                wsByFeatureVectorGroup.Add(new FeatureVectorAsObject(X, Trainer.FEATURES_INJECT_WS), y1);
                hposByFeatureVectorGroup.Add(new FeatureVectorAsObject(X, Trainer.FEATURES_HPOS), y2);
            }
            IList <double> wsEntropies   = new List <double>();
            IList <double> hposEntropies = new List <double>();

            foreach (FeatureVectorAsObject x in wsByFeatureVectorGroup.Keys)
            {
                var           cats        = wsByFeatureVectorGroup[x];
                var           cats2       = hposByFeatureVectorGroup[x];
                HashBag <int> wsCats      = getCategoriesBag(cats);
                HashBag <int> hposCats    = getCategoriesBag(cats2);
                double        wsEntropy   = getNormalizedCategoryEntropy(getCategoryRatios(wsCats.Values));
                double        hposEntropy = getNormalizedCategoryEntropy(getCategoryRatios(hposCats.Values));
                wsEntropies.Add(wsEntropy);
                hposEntropies.Add(hposEntropy);
                Console.Write("{0,130} : {1},{2} {3},{4}\n", x, wsCats, wsEntropy, hposCats, hposEntropy);
            }
            Console.WriteLine("MEAN " + BuffUtils.mean(wsEntropies));
            Console.WriteLine("MEAN " + BuffUtils.mean(hposEntropies));
            float contextRichness = wsEntropies.Count / (float)numContexts;              // 0..1 where 1 means every token had different context

            Console.WriteLine("Context richness = " + contextRichness + " uniq ctxs=" + wsEntropies.Count + ", nctxs=" + numContexts);
        }