protected override DictionaryWord Read() { if (BaseReader.EndOfStream) return null; var line = BaseReader.ReadLine(); var columns = line.Split(' '); return new DictionaryWord { Id = int.Parse(columns[0]), Word = columns[1], Meanings = columns .Skip(2) .Select(x => { var values = x.Split(':'); return new DictionaryMeaning { Id = int.Parse(values[0]), Meaning = values[1], PartOfSpeech = SynsetHelper.GetPos(values[1]), Encounters = int.Parse(values[2]) }; }) .ToMeaningDictionary() }; }
protected override DictionaryWord Read() { if (BaseReader.EndOfStream) { return(null); } var line = BaseReader.ReadLine(); if (string.IsNullOrWhiteSpace(line)) { return(null); } var match = ParseRegex.Match(line); if (!match.Success) { return(null); } return(new DictionaryWord { Id = ++_wordCounter, Word = match.Groups["word"].Value, Meanings = match.Groups["meaning"].Captures .Cast <Capture>() .Select((x, i) => { var split = x.Value.Split(':'); if (split.Length == 1) { return new DictionaryMeaning { Id = i + 1, Meaning = split[0], PartOfSpeech = SynsetHelper.GetPos(split[0]) } } ; int.TryParse(split[1], out var encounters); return new DictionaryMeaning { Id = i + 1, Meaning = split[0], PartOfSpeech = SynsetHelper.GetPos(split[0]), Encounters = encounters }; }) .ToMeaningDictionary() });
private MeaningDictionary ParseEncounters(string value) { return(value .Split('|') .Where(x => !string.IsNullOrEmpty(x)) .Select(x => { var values = x.Split(':'); return new DictionaryMeaning { Id = int.Parse(values[0]), Meaning = values[1], PartOfSpeech = SynsetHelper.GetPos(values[1]), Encounters = int.Parse(values[2]) }; }) .ToMeaningDictionary()); }
internal void AddEncounter(WordDictionary dictionary, RawWordEncounter encounter) { var dictionaryWord = dictionary.GetByName(encounter.Word); var dictionaryMeaning = dictionaryWord?.Meanings.GetByName(encounter.Meaning); var meaningAnalysis = GetByName(encounter.Meaning); if (meaningAnalysis == null) { meaningAnalysis = new DictionaryMeaning { Id = dictionaryMeaning?.Id ?? -1, Meaning = encounter.Meaning, PartOfSpeech = SynsetHelper.GetPos(encounter.Meaning) }; Add(encounter.Meaning, meaningAnalysis); } meaningAnalysis.Encounters++; }
protected override RawWordEncounter Read() { if (BaseReader.EndOfStream) { return(null); } var line = BaseReader.ReadLine(); if (string.IsNullOrWhiteSpace(line)) { return new RawWordEncounter { Word = RawWordEncounter.EndOfSentence } } ; var match = ParseRegex.Match(line); if (!match.Success) { return new RawWordEncounter { Word = RawWordEncounter.EndOfSentence } } ; return(new RawWordEncounter { Word = match.Groups["word"].Value, Pos = SynsetHelper.GetPos(match.Groups["meaning"].Value), Meaning = match.Groups["meaning"].Value }); } } }
public static TextData[] Read( string dataPath, string goldKeyPath, SynsetDictionary synsetMappings, WordDictionary dictionary, out XmlParseError[] errors, IProgressHandle progress = null) { var scope = progress?.Scope(1); try { var result = new List <TextData>(); var serializer = new XmlSerializer(typeof(UefXmlData)); var xmlParseErrors = new List <XmlParseError>(); using (var reader = new StreamReader(dataPath)) { var goldKeys = File.ReadAllLines(goldKeyPath) .Where(x => !string.IsNullOrWhiteSpace(x)) .Select(x => x.Trim(' ').Split(' ')) .Where(x => x.Length > 1) .DistinctBy(x => x[0]) .ToDictionary(x => x[0], x => string.Join(" ", x.Skip(1))); var dataXml = (UefXmlData)serializer.Deserialize(reader); foreach (var text in dataXml.Texts) { var encounters = new List <RawWordEncounter>(); foreach (var sentence in text.Sentences) { for (var i = 0; i < sentence.Encounters.Length; i++) { var encounter = sentence.Encounters[i]; var encounterType = sentence.EnumTypes[i]; var rawWordEncounter = new RawWordEncounter { Word = encounter.Lemma, Pos = encounter.Pos, Meaning = string.Empty }; if (encounterType == ItemChoiceType.instance) { var status = SynsetHelper.TryGetMeaning( dictionary, goldKeys, synsetMappings, encounter.Lemma, encounter.Id, out var meaning); if (status == TryGetMeaningStatus.OK) { rawWordEncounter.Meaning = meaning; } else { xmlParseErrors.Add(new XmlParseError { EncounterId = encounter.Id, Error = status }); } } encounters.Add(rawWordEncounter); } encounters.Add(RawWordEncounter.EndOfSentenceEncounter); } result.Add(new TextData(text.Id, encounters.ToArray())); } } errors = xmlParseErrors.ToArray(); return(result.ToArray()); } finally { scope?.Dispose(); } }