Exemple #1
0
        public static TryGetMeaningStatus TryGetMeaning(
            WordDictionary dictionary, Dictionary <string, string> goldKeyDictionary,
            SynsetDictionary synsetMappings, string word, string keyId, out string meaning)
        {
            meaning = string.Empty;

            if (goldKeyDictionary.ContainsKey(keyId))
            {
                var senseKeyList        = goldKeyDictionary[keyId];
                var senseKeys           = senseKeyList.Split(' ');
                var mostFrequentMeaning = senseKeys
                                          .Where(synsetMappings.ContainsKey)
                                          .Select(x => FromRawMeaning(synsetMappings[x]))
                                          .OrderBy(x => dictionary.GetByName(word)?.Meanings.GetByName(x)?.Id ?? int.MaxValue)
                                          .FirstOrDefault();

                if (string.IsNullOrEmpty(mostFrequentMeaning))
                {
                    return(TryGetMeaningStatus.NoSynsetMappingFound);
                }

                meaning = mostFrequentMeaning;

                return(TryGetMeaningStatus.OK);
            }

            return(TryGetMeaningStatus.IdNotPresentInGoldKeyDictionary);
        }
Exemple #2
0
        public static TextData[] Read(
            string dataPath, string goldKeyPath, SynsetDictionary synsetMappings,
            WordDictionary dictionary, out XmlParseError[] errors,
            IProgressHandle progress = null)
        {
            var scope = progress?.Scope(1);

            try
            {
                var result         = new List <TextData>();
                var serializer     = new XmlSerializer(typeof(UefXmlData));
                var xmlParseErrors = new List <XmlParseError>();

                using (var reader = new StreamReader(dataPath))
                {
                    var goldKeys = File.ReadAllLines(goldKeyPath)
                                   .Where(x => !string.IsNullOrWhiteSpace(x))
                                   .Select(x => x.Trim(' ').Split(' '))
                                   .Where(x => x.Length > 1)
                                   .DistinctBy(x => x[0])
                                   .ToDictionary(x => x[0], x => string.Join(" ", x.Skip(1)));

                    var dataXml = (UefXmlData)serializer.Deserialize(reader);

                    foreach (var text in dataXml.Texts)
                    {
                        var encounters = new List <RawWordEncounter>();

                        foreach (var sentence in text.Sentences)
                        {
                            for (var i = 0; i < sentence.Encounters.Length; i++)
                            {
                                var encounter        = sentence.Encounters[i];
                                var encounterType    = sentence.EnumTypes[i];
                                var rawWordEncounter = new RawWordEncounter
                                {
                                    Word    = encounter.Lemma,
                                    Pos     = encounter.Pos,
                                    Meaning = string.Empty
                                };

                                if (encounterType == ItemChoiceType.instance)
                                {
                                    var status = SynsetHelper.TryGetMeaning(
                                        dictionary, goldKeys, synsetMappings,
                                        encounter.Lemma, encounter.Id, out var meaning);

                                    if (status == TryGetMeaningStatus.OK)
                                    {
                                        rawWordEncounter.Meaning = meaning;
                                    }
                                    else
                                    {
                                        xmlParseErrors.Add(new XmlParseError
                                        {
                                            EncounterId = encounter.Id,
                                            Error       = status
                                        });
                                    }
                                }

                                encounters.Add(rawWordEncounter);
                            }

                            encounters.Add(RawWordEncounter.EndOfSentenceEncounter);
                        }

                        result.Add(new TextData(text.Id, encounters.ToArray()));
                    }
                }

                errors = xmlParseErrors.ToArray();

                return(result.ToArray());
            }
            finally
            {
                scope?.Dispose();
            }
        }