public LanguageModel<T> Load(XElement xLanguageModel)
        {
            var metadata = xLanguageModel.Element(MetadataElement).Elements().ToDictionary(el => el.Name.ToString(), el => el.Value);
            var xLanguage = xLanguageModel.Element(LanguageElement);
            string iso639_2T = null;
            var xIso639_2T = xLanguage.Attribute(LanguageIso639_2T_Attribute);
            if (xIso639_2T != null)
                iso639_2T = xIso639_2T.Value;
            string iso639_3 = null;
            var xIso639_3 = xLanguage.Attribute(LanguageIso639_3_Attribute);
            if (xIso639_3 != null)
                iso639_3 = xIso639_3.Value;
            string englishName = null;
            var xEnglishName = xLanguage.Attribute(LanguageEnglishNameAttribute);
            if (xEnglishName != null)
                englishName = xEnglishName.Value;
            string localName = null;
            var xLocalName = xLanguage.Attribute(LanguageLocalNameAttribute);
            if (xLocalName != null)
                localName = xLocalName.Value;
            var language = new LanguageInfo(iso639_2T, iso639_3, englishName, localName);

            var features = new Distribution<T>(new Bag<T>());
            var xNgramsElement = xLanguageModel.Element(NGramsElement);
            foreach (var xElement in xNgramsElement.Elements(NGramElement))
            {
                features.AddEvent(_deserializeFeature(xElement.Attribute(TextAttribute).Value), long.Parse(xElement.Attribute(CountAttribute).Value));
            }
            features.AddNoise(long.Parse(xNgramsElement.Attribute(TotalNoiseCountAtribute).Value), long.Parse(xNgramsElement.Attribute(DistinctNoiseCountAtribute).Value));
            return new LanguageModel<T>(features, language, metadata);
        }
Exemple #2
0
        public LanguageModel <T> Load(Stream sourceStream, LanguageInfo language)
        {
            Distribution <T> result       = new Distribution <T>(new Bag <T>());
            StreamReader     streamReader = new StreamReader(sourceStream, _encoding);

            string line;

            while ((line = streamReader.ReadLine()) != null)
            {
                string[] keyValue = line.Split(new [] { "\t " }, StringSplitOptions.None);
                if (keyValue.Length != 2)
                {
                    throw new InvalidOperationException("Encountered invalid key value pair in source data.");
                }
                result.AddEvent(_deserializeFeature(keyValue[0]), long.Parse(keyValue[1]));
            }
            return(new LanguageModel <T>(result, language));
        }
        public LanguageModel <T> Load(XElement xLanguageModel)
        {
            var    metadata   = xLanguageModel.Element(MetadataElement).Elements().ToDictionary(el => el.Name.ToString(), el => el.Value);
            var    xLanguage  = xLanguageModel.Element(LanguageElement);
            string iso639_2T  = null;
            var    xIso639_2T = xLanguage.Attribute(LanguageIso639_2T_Attribute);

            if (xIso639_2T != null)
            {
                iso639_2T = xIso639_2T.Value;
            }
            string iso639_3  = null;
            var    xIso639_3 = xLanguage.Attribute(LanguageIso639_3_Attribute);

            if (xIso639_3 != null)
            {
                iso639_3 = xIso639_3.Value;
            }
            string englishName  = null;
            var    xEnglishName = xLanguage.Attribute(LanguageEnglishNameAttribute);

            if (xEnglishName != null)
            {
                englishName = xEnglishName.Value;
            }
            string localName  = null;
            var    xLocalName = xLanguage.Attribute(LanguageLocalNameAttribute);

            if (xLocalName != null)
            {
                localName = xLocalName.Value;
            }
            var language = new LanguageInfo(iso639_2T, iso639_3, englishName, localName);

            var features       = new Distribution <T>(new Bag <T>());
            var xNgramsElement = xLanguageModel.Element(NGramsElement);

            foreach (var xElement in xNgramsElement.Elements(NGramElement))
            {
                features.AddEvent(_deserializeFeature(xElement.Attribute(TextAttribute).Value), long.Parse(xElement.Attribute(CountAttribute).Value));
            }
            features.AddNoise(long.Parse(xNgramsElement.Attribute(TotalNoiseCountAtribute).Value), long.Parse(xNgramsElement.Attribute(DistinctNoiseCountAtribute).Value));
            return(new LanguageModel <T>(features, language, metadata));
        }
 public LanguageModel(IDistribution <T> features, LanguageInfo language, IDictionary <string, string> metadata)
 {
     Language = language;
     Metadata = metadata;
     Features = features;
 }
 public LanguageModel(IDistribution <T> features, LanguageInfo language)
 {
     Language = language;
     Features = features;
     Metadata = new Dictionary <string, string>();
 }