Beispiel #1
0
        protected override DictionaryWord Read()
        {
            if (BaseReader.EndOfStream)
                return null;

            var line = BaseReader.ReadLine();
            var columns = line.Split(' ');

            return new DictionaryWord
            {
                Id = int.Parse(columns[0]),
                Word = columns[1],
                Meanings = columns
                    .Skip(2)
                    .Select(x =>
                    {
                        var values = x.Split(':');

                        return new DictionaryMeaning
                        {
                            Id = int.Parse(values[0]),
                            Meaning = values[1],
                            PartOfSpeech = SynsetHelper.GetPos(values[1]),
                            Encounters = int.Parse(values[2])
                        };
                    })
                    .ToMeaningDictionary()
            };
        }
Beispiel #2
0
        protected override DictionaryWord Read()
        {
            if (BaseReader.EndOfStream)
            {
                return(null);
            }

            var line = BaseReader.ReadLine();

            if (string.IsNullOrWhiteSpace(line))
            {
                return(null);
            }

            var match = ParseRegex.Match(line);

            if (!match.Success)
            {
                return(null);
            }

            return(new DictionaryWord
            {
                Id = ++_wordCounter,
                Word = match.Groups["word"].Value,
                Meanings = match.Groups["meaning"].Captures
                           .Cast <Capture>()
                           .Select((x, i) =>
                {
                    var split = x.Value.Split(':');

                    if (split.Length == 1)
                    {
                        return new DictionaryMeaning
                        {
                            Id = i + 1,
                            Meaning = split[0],
                            PartOfSpeech = SynsetHelper.GetPos(split[0])
                        }
                    }
                    ;

                    int.TryParse(split[1], out var encounters);

                    return new DictionaryMeaning
                    {
                        Id = i + 1,
                        Meaning = split[0],
                        PartOfSpeech = SynsetHelper.GetPos(split[0]),
                        Encounters = encounters
                    };
                })
                           .ToMeaningDictionary()
            });
Beispiel #3
0
        private MeaningDictionary ParseEncounters(string value)
        {
            return(value
                   .Split('|')
                   .Where(x => !string.IsNullOrEmpty(x))
                   .Select(x =>
            {
                var values = x.Split(':');

                return new DictionaryMeaning
                {
                    Id = int.Parse(values[0]),
                    Meaning = values[1],
                    PartOfSpeech = SynsetHelper.GetPos(values[1]),
                    Encounters = int.Parse(values[2])
                };
            })
                   .ToMeaningDictionary());
        }
        internal void AddEncounter(WordDictionary dictionary, RawWordEncounter encounter)
        {
            var dictionaryWord    = dictionary.GetByName(encounter.Word);
            var dictionaryMeaning = dictionaryWord?.Meanings.GetByName(encounter.Meaning);

            var meaningAnalysis = GetByName(encounter.Meaning);

            if (meaningAnalysis == null)
            {
                meaningAnalysis = new DictionaryMeaning
                {
                    Id           = dictionaryMeaning?.Id ?? -1,
                    Meaning      = encounter.Meaning,
                    PartOfSpeech = SynsetHelper.GetPos(encounter.Meaning)
                };

                Add(encounter.Meaning, meaningAnalysis);
            }

            meaningAnalysis.Encounters++;
        }
        protected override RawWordEncounter Read()
        {
            if (BaseReader.EndOfStream)
            {
                return(null);
            }

            var line = BaseReader.ReadLine();

            if (string.IsNullOrWhiteSpace(line))
            {
                return new RawWordEncounter
                       {
                           Word = RawWordEncounter.EndOfSentence
                       }
            }
            ;

            var match = ParseRegex.Match(line);

            if (!match.Success)
            {
                return new RawWordEncounter
                       {
                           Word = RawWordEncounter.EndOfSentence
                       }
            }
            ;

            return(new RawWordEncounter
            {
                Word = match.Groups["word"].Value,
                Pos = SynsetHelper.GetPos(match.Groups["meaning"].Value),
                Meaning = match.Groups["meaning"].Value
            });
        }
    }
}
Beispiel #6
0
        public static TextData[] Read(
            string dataPath, string goldKeyPath, SynsetDictionary synsetMappings,
            WordDictionary dictionary, out XmlParseError[] errors,
            IProgressHandle progress = null)
        {
            var scope = progress?.Scope(1);

            try
            {
                var result         = new List <TextData>();
                var serializer     = new XmlSerializer(typeof(UefXmlData));
                var xmlParseErrors = new List <XmlParseError>();

                using (var reader = new StreamReader(dataPath))
                {
                    var goldKeys = File.ReadAllLines(goldKeyPath)
                                   .Where(x => !string.IsNullOrWhiteSpace(x))
                                   .Select(x => x.Trim(' ').Split(' '))
                                   .Where(x => x.Length > 1)
                                   .DistinctBy(x => x[0])
                                   .ToDictionary(x => x[0], x => string.Join(" ", x.Skip(1)));

                    var dataXml = (UefXmlData)serializer.Deserialize(reader);

                    foreach (var text in dataXml.Texts)
                    {
                        var encounters = new List <RawWordEncounter>();

                        foreach (var sentence in text.Sentences)
                        {
                            for (var i = 0; i < sentence.Encounters.Length; i++)
                            {
                                var encounter        = sentence.Encounters[i];
                                var encounterType    = sentence.EnumTypes[i];
                                var rawWordEncounter = new RawWordEncounter
                                {
                                    Word    = encounter.Lemma,
                                    Pos     = encounter.Pos,
                                    Meaning = string.Empty
                                };

                                if (encounterType == ItemChoiceType.instance)
                                {
                                    var status = SynsetHelper.TryGetMeaning(
                                        dictionary, goldKeys, synsetMappings,
                                        encounter.Lemma, encounter.Id, out var meaning);

                                    if (status == TryGetMeaningStatus.OK)
                                    {
                                        rawWordEncounter.Meaning = meaning;
                                    }
                                    else
                                    {
                                        xmlParseErrors.Add(new XmlParseError
                                        {
                                            EncounterId = encounter.Id,
                                            Error       = status
                                        });
                                    }
                                }

                                encounters.Add(rawWordEncounter);
                            }

                            encounters.Add(RawWordEncounter.EndOfSentenceEncounter);
                        }

                        result.Add(new TextData(text.Id, encounters.ToArray()));
                    }
                }

                errors = xmlParseErrors.ToArray();

                return(result.ToArray());
            }
            finally
            {
                scope?.Dispose();
            }
        }