示例#1
0
        private void LoadWordEmbedding(string extEmbeddingFilePath, IWeightTensor embeddingMatrix, IEnumerable <KeyValuePair <string, int> > wordToIndex)
        {
            Txt2Vec.Model extEmbeddingModel = new Txt2Vec.Model();

            if (extEmbeddingFilePath.EndsWith("txt", StringComparison.InvariantCultureIgnoreCase))
            {
                extEmbeddingModel.LoadTextModel(extEmbeddingFilePath);
            }
            else
            {
                extEmbeddingModel.LoadBinaryModel(extEmbeddingFilePath);
            }

            if (extEmbeddingModel.VectorSize != embeddingMatrix.Columns)
            {
                throw new ArgumentException($"Inconsistent embedding size. ExtEmbeddingModel size = '{extEmbeddingModel.VectorSize}', EmbeddingMatrix column size = '{embeddingMatrix.Columns}'");
            }

            foreach (KeyValuePair <string, int> pair in wordToIndex)
            {
                float[] vector = extEmbeddingModel.GetVector(pair.Key);
                if (vector != null)
                {
                    embeddingMatrix.SetWeightAtRow(pair.Value, vector);
                }
            }
        }
        private void LoadWordEmbedding(string extEmbeddingFilePath, IWeightMatrix embeddingMatrix, ConcurrentDictionary <string, int> wordToIndex)
        {
            Txt2Vec.Model extEmbeddingModel = new Txt2Vec.Model();
            extEmbeddingModel.LoadBinaryModel(extEmbeddingFilePath);

            if (extEmbeddingModel.VectorSize != embeddingMatrix.Columns)
            {
                throw new ArgumentException($"Inconsistent embedding size. ExtEmbeddingModel size = '{extEmbeddingModel.VectorSize}', EmbeddingMatrix column size = '{embeddingMatrix.Columns}'");
            }

            foreach (KeyValuePair <string, int> pair in wordToIndex)
            {
                float[] vector = extEmbeddingModel.GetVector(pair.Key);

                if (vector != null)
                {
                    embeddingMatrix.SetWeightAtRow(pair.Value, vector);
                }
            }
        }
示例#3
0
        public WordEMWrapFeaturizer(string filename)
        {
            Txt2Vec.Model model = new Txt2Vec.Model();
            model.LoadBinaryModel(filename);

            string[] terms = model.GetAllTerms();
            vectorSize = model.VectorSize;

            m_WordEmbedding = new Dictionary <string, SingleVector>();
            m_UnkEmbedding  = new SingleVector(vectorSize);

            foreach (string term in terms)
            {
                float[] vector = model.GetVector(term);

                if (vector != null)
                {
                    SingleVector spVector = new SingleVector(vectorSize, vector);
                    m_WordEmbedding.Add(term, spVector);
                }
            }
        }
示例#4
0
        public WordEMWrapFeaturizer(string filename, bool textFormat = false)
        {
            Txt2Vec.Model model = new Txt2Vec.Model();
            model.LoadModel(filename, textFormat);

            string[] terms = model.GetAllTerms();
            vectorSize = model.VectorSize;

            m_WordEmbedding = new Dictionary<string, SingleVector>();
            m_UnkEmbedding = new SingleVector(vectorSize);

            foreach (string term in terms)
            {
                float[] vector = model.GetVector(term);

                if (vector != null)
                {
                    SingleVector spVector = new SingleVector(vectorSize, vector);
                    m_WordEmbedding.Add(term, spVector);
                }
            }
        }