示例#1
0
        public IList <FeatureValue> GetValues(RawWordEncounter encounter, FeatureSelectionContext context)
        {
            if (context.DataSetName == DataSetName.Train ||
                context.DataSetName == DataSetName.Validation)
            {
                var embedding = context.Project.MeaningEmbeddings.GetVectorOrDefault(encounter.Meaning);

                return(FeatureValue.NewArray(embedding));
            }

            var meanings = context.ReorderedDictionary
                           .GetByName(encounter.Word)?
                           .Meanings.Values
                           .Where(x => x.PartOfSpeech == encounter.Pos)
                           .ToArray();

            if (meanings == null || meanings.Length == 0)
            {
                return(FeatureValue.NewArray(0, context.Project.MeaningEmbeddings.VectorLength));
            }

            var averageEmbedding = new float[context.Project.MeaningEmbeddings.VectorLength];
            var divisor          = 0;

            for (var m = 0; m < meanings.Length; m++)
            {
                var embedding = context.Project.MeaningEmbeddings.GetVectorOrDefault(meanings[m].Meaning);

                for (var i = 0; i < averageEmbedding.Length; i++)
                {
                    averageEmbedding[i] += embedding[i] * (meanings[m].Encounters + 1);
                    divisor             += meanings[m].Encounters + 1;
                }
            }

            for (var i = 0; i < averageEmbedding.Length; i++)
            {
                averageEmbedding[i] /= divisor;
            }

            return(FeatureValue.NewArray(averageEmbedding));
        }
        public IList <FeatureValue> GetValues(RawWordEncounter encounter, FeatureSelectionContext context)
        {
            if (context.DataSetName == DataSetName.Train ||
                context.DataSetName == DataSetName.Validation)
            {
                var embedding = context.Project.MeaningEmbeddings.GetVectorOrDefault(encounter.Meaning);

                return(FeatureValue.NewArray(embedding));
            }

            var mostFrequentMeaning = context.ReorderedDictionary
                                      .GetByName(encounter.Word)?
                                      .Meanings.Values
                                      .SingleOrDefault(x => x.Id == 1 && x.PartOfSpeech == encounter.Pos)?
                                      .Meaning;

            var mostFrequentEmbedding = context.Project.MeaningEmbeddings
                                        .GetVectorOrDefault(mostFrequentMeaning);

            return(FeatureValue.NewArray(mostFrequentEmbedding));
        }
示例#3
0
        public IList <FeatureValue> GetValues(RawWordEncounter encounter, FeatureSelectionContext context)
        {
            var posVector = context.FilteredPosList.GetVector(encounter.Pos);

            return(FeatureValue.NewArray(posVector));
        }
        public IList <FeatureValue> Aggregate(
            IList <EncounterValues> values, RawRecord record,
            FeatureGroup featureGroup, FeatureSelectionContext context)
        {
            var wordAnalyses        = new WordAnalysis[values.Count];
            var hasNullWordAnalysis = false;

            for (var i = 0; i < values.Count; i++)
            {
                if (values[i].Encounter == RawWordEncounter.EmptyWordEncounter)
                {
                    return(FeatureValue.NewArray(0, values[i].Values.Count));
                }

                wordAnalyses[i] = context.Project.DataAnalysis.GetByName(values[i].Encounter.Word);

                if (wordAnalyses[i] == null)
                {
                    hasNullWordAnalysis = true;
                }
            }

            var weights = new double[wordAnalyses.Length];

            if (hasNullWordAnalysis)
            {
                var weight = 1d / wordAnalyses.Length;

                for (var i = 0; i < weights.Length; i++)
                {
                    weights[i] = weight;
                }
            }
            else
            {
                var trainEncounters    = new double[wordAnalyses.Length];
                var trainEncountersSum = 0;

                for (var i = 0; i < weights.Length; i++)
                {
                    var currentTrainingEncounters = wordAnalyses[i].TrainEncounters.Values
                                                    .Sum(x => x.Encounters);

                    trainEncounters[i]  = currentTrainingEncounters;
                    trainEncountersSum += currentTrainingEncounters;
                }

                if (trainEncountersSum == 0)
                {
                    return(FeatureValue.NewArray(0, values[0].Values.Count));
                }

                for (var i = 0; i < weights.Length; i++)
                {
                    weights[i] = trainEncounters[i] / trainEncountersSum;
                }
            }

            var result = new float[values[0].Values.Count];

            for (var vectorIndex = 0; vectorIndex < result.Length; vectorIndex++)
            {
                for (var encounterIndex = 0; encounterIndex < values.Count; encounterIndex++)
                {
                    result[vectorIndex] += (float)(weights[encounterIndex] *
                                                   values[encounterIndex].Values[vectorIndex].NumericValue);
                }
            }

            return(FeatureValue.NewArray(result));
        }
示例#5
0
        public IList <FeatureValue> GetValues(RawWordEncounter encounter, FeatureSelectionContext context)
        {
            var embedding = context.Project.MeaningEmbeddings.GetVectorOrDefault(encounter.Meaning);

            return(FeatureValue.NewArray(embedding));
        }