public LanguageModel <T> Load(Stream sourceStream, LanguageInfo language) { Distribution <T> result = new Distribution <T>(new Bag <T>()); StreamReader streamReader = new StreamReader(sourceStream, _encoding); string line; while ((line = streamReader.ReadLine()) != null) { string[] keyValue = line.Split(new [] { "\t " }, StringSplitOptions.None); if (keyValue.Length != 2) { throw new InvalidOperationException("Encountered invalid key value pair in source data."); } result.AddEvent(_deserializeFeature(keyValue[0]), long.Parse(keyValue[1])); } return(new LanguageModel <T>(result, language)); }
public LanguageModel <T> Load(XElement xLanguageModel) { var metadata = xLanguageModel.Element(MetadataElement).Elements().ToDictionary(el => el.Name.ToString(), el => el.Value); var xLanguage = xLanguageModel.Element(LanguageElement); string iso639_2T = null; var xIso639_2T = xLanguage.Attribute(LanguageIso639_2T_Attribute); if (xIso639_2T != null) { iso639_2T = xIso639_2T.Value; } string iso639_3 = null; var xIso639_3 = xLanguage.Attribute(LanguageIso639_3_Attribute); if (xIso639_3 != null) { iso639_3 = xIso639_3.Value; } string englishName = null; var xEnglishName = xLanguage.Attribute(LanguageEnglishNameAttribute); if (xEnglishName != null) { englishName = xEnglishName.Value; } string localName = null; var xLocalName = xLanguage.Attribute(LanguageLocalNameAttribute); if (xLocalName != null) { localName = xLocalName.Value; } var language = new LanguageInfo(iso639_2T, iso639_3, englishName, localName); var features = new Distribution <T>(new Bag <T>()); var xNgramsElement = xLanguageModel.Element(NGramsElement); foreach (var xElement in xNgramsElement.Elements(NGramElement)) { features.AddEvent(_deserializeFeature(xElement.Attribute(TextAttribute).Value), long.Parse(xElement.Attribute(CountAttribute).Value)); } features.AddNoise(long.Parse(xNgramsElement.Attribute(TotalNoiseCountAtribute).Value), long.Parse(xNgramsElement.Attribute(DistinctNoiseCountAtribute).Value)); return(new LanguageModel <T>(features, language, metadata)); }
public LanguageModel<T> Load(XElement xLanguageModel) { var metadata = xLanguageModel.Element(MetadataElement).Elements().ToDictionary(el => el.Name.ToString(), el => el.Value); var xLanguage = xLanguageModel.Element(LanguageElement); string iso639_2T = null; var xIso639_2T = xLanguage.Attribute(LanguageIso639_2T_Attribute); if (xIso639_2T != null) iso639_2T = xIso639_2T.Value; string iso639_3 = null; var xIso639_3 = xLanguage.Attribute(LanguageIso639_3_Attribute); if (xIso639_3 != null) iso639_3 = xIso639_3.Value; string englishName = null; var xEnglishName = xLanguage.Attribute(LanguageEnglishNameAttribute); if (xEnglishName != null) englishName = xEnglishName.Value; string localName = null; var xLocalName = xLanguage.Attribute(LanguageLocalNameAttribute); if (xLocalName != null) localName = xLocalName.Value; var language = new LanguageInfo(iso639_2T, iso639_3, englishName, localName); var features = new Distribution<T>(new Bag<T>()); var xNgramsElement = xLanguageModel.Element(NGramsElement); foreach (var xElement in xNgramsElement.Elements(NGramElement)) { features.AddEvent(_deserializeFeature(xElement.Attribute(TextAttribute).Value), long.Parse(xElement.Attribute(CountAttribute).Value)); } features.AddNoise(long.Parse(xNgramsElement.Attribute(TotalNoiseCountAtribute).Value), long.Parse(xNgramsElement.Attribute(DistinctNoiseCountAtribute).Value)); return new LanguageModel<T>(features, language, metadata); }