public IList <FeatureValue> GetValues(RawWordEncounter encounter, FeatureSelectionContext context) { if (context.DataSetName == DataSetName.Train || context.DataSetName == DataSetName.Validation) { var embedding = context.Project.MeaningEmbeddings.GetVectorOrDefault(encounter.Meaning); return(FeatureValue.NewArray(embedding)); } var meanings = context.ReorderedDictionary .GetByName(encounter.Word)? .Meanings.Values .Where(x => x.PartOfSpeech == encounter.Pos) .ToArray(); if (meanings == null || meanings.Length == 0) { return(FeatureValue.NewArray(0, context.Project.MeaningEmbeddings.VectorLength)); } var averageEmbedding = new float[context.Project.MeaningEmbeddings.VectorLength]; var divisor = 0; for (var m = 0; m < meanings.Length; m++) { var embedding = context.Project.MeaningEmbeddings.GetVectorOrDefault(meanings[m].Meaning); for (var i = 0; i < averageEmbedding.Length; i++) { averageEmbedding[i] += embedding[i] * (meanings[m].Encounters + 1); divisor += meanings[m].Encounters + 1; } } for (var i = 0; i < averageEmbedding.Length; i++) { averageEmbedding[i] /= divisor; } return(FeatureValue.NewArray(averageEmbedding)); }
public IList <FeatureValue> GetValues(RawWordEncounter encounter, FeatureSelectionContext context) { if (context.DataSetName == DataSetName.Train || context.DataSetName == DataSetName.Validation) { var embedding = context.Project.MeaningEmbeddings.GetVectorOrDefault(encounter.Meaning); return(FeatureValue.NewArray(embedding)); } var mostFrequentMeaning = context.ReorderedDictionary .GetByName(encounter.Word)? .Meanings.Values .SingleOrDefault(x => x.Id == 1 && x.PartOfSpeech == encounter.Pos)? .Meaning; var mostFrequentEmbedding = context.Project.MeaningEmbeddings .GetVectorOrDefault(mostFrequentMeaning); return(FeatureValue.NewArray(mostFrequentEmbedding)); }
public IList <FeatureValue> GetValues(RawWordEncounter encounter, FeatureSelectionContext context) { var posVector = context.FilteredPosList.GetVector(encounter.Pos); return(FeatureValue.NewArray(posVector)); }
public IList <FeatureValue> Aggregate( IList <EncounterValues> values, RawRecord record, FeatureGroup featureGroup, FeatureSelectionContext context) { var wordAnalyses = new WordAnalysis[values.Count]; var hasNullWordAnalysis = false; for (var i = 0; i < values.Count; i++) { if (values[i].Encounter == RawWordEncounter.EmptyWordEncounter) { return(FeatureValue.NewArray(0, values[i].Values.Count)); } wordAnalyses[i] = context.Project.DataAnalysis.GetByName(values[i].Encounter.Word); if (wordAnalyses[i] == null) { hasNullWordAnalysis = true; } } var weights = new double[wordAnalyses.Length]; if (hasNullWordAnalysis) { var weight = 1d / wordAnalyses.Length; for (var i = 0; i < weights.Length; i++) { weights[i] = weight; } } else { var trainEncounters = new double[wordAnalyses.Length]; var trainEncountersSum = 0; for (var i = 0; i < weights.Length; i++) { var currentTrainingEncounters = wordAnalyses[i].TrainEncounters.Values .Sum(x => x.Encounters); trainEncounters[i] = currentTrainingEncounters; trainEncountersSum += currentTrainingEncounters; } if (trainEncountersSum == 0) { return(FeatureValue.NewArray(0, values[0].Values.Count)); } for (var i = 0; i < weights.Length; i++) { weights[i] = trainEncounters[i] / trainEncountersSum; } } var result = new float[values[0].Values.Count]; for (var vectorIndex = 0; vectorIndex < result.Length; vectorIndex++) { for (var encounterIndex = 0; encounterIndex < values.Count; encounterIndex++) { result[vectorIndex] += (float)(weights[encounterIndex] * values[encounterIndex].Values[vectorIndex].NumericValue); } } return(FeatureValue.NewArray(result)); }
public IList <FeatureValue> GetValues(RawWordEncounter encounter, FeatureSelectionContext context) { var embedding = context.Project.MeaningEmbeddings.GetVectorOrDefault(encounter.Meaning); return(FeatureValue.NewArray(embedding)); }