private void LoadWordEmbedding(string extEmbeddingFilePath, IWeightTensor embeddingMatrix, IEnumerable <KeyValuePair <string, int> > wordToIndex) { Txt2Vec.Model extEmbeddingModel = new Txt2Vec.Model(); if (extEmbeddingFilePath.EndsWith("txt", StringComparison.InvariantCultureIgnoreCase)) { extEmbeddingModel.LoadTextModel(extEmbeddingFilePath); } else { extEmbeddingModel.LoadBinaryModel(extEmbeddingFilePath); } if (extEmbeddingModel.VectorSize != embeddingMatrix.Columns) { throw new ArgumentException($"Inconsistent embedding size. ExtEmbeddingModel size = '{extEmbeddingModel.VectorSize}', EmbeddingMatrix column size = '{embeddingMatrix.Columns}'"); } foreach (KeyValuePair <string, int> pair in wordToIndex) { float[] vector = extEmbeddingModel.GetVector(pair.Key); if (vector != null) { embeddingMatrix.SetWeightAtRow(pair.Value, vector); } } }
private void LoadWordEmbedding(string extEmbeddingFilePath, IWeightMatrix embeddingMatrix, ConcurrentDictionary <string, int> wordToIndex) { Txt2Vec.Model extEmbeddingModel = new Txt2Vec.Model(); extEmbeddingModel.LoadBinaryModel(extEmbeddingFilePath); if (extEmbeddingModel.VectorSize != embeddingMatrix.Columns) { throw new ArgumentException($"Inconsistent embedding size. ExtEmbeddingModel size = '{extEmbeddingModel.VectorSize}', EmbeddingMatrix column size = '{embeddingMatrix.Columns}'"); } foreach (KeyValuePair <string, int> pair in wordToIndex) { float[] vector = extEmbeddingModel.GetVector(pair.Key); if (vector != null) { embeddingMatrix.SetWeightAtRow(pair.Value, vector); } } }
public WordEMWrapFeaturizer(string filename) { Txt2Vec.Model model = new Txt2Vec.Model(); model.LoadBinaryModel(filename); string[] terms = model.GetAllTerms(); vectorSize = model.VectorSize; m_WordEmbedding = new Dictionary <string, SingleVector>(); m_UnkEmbedding = new SingleVector(vectorSize); foreach (string term in terms) { float[] vector = model.GetVector(term); if (vector != null) { SingleVector spVector = new SingleVector(vectorSize, vector); m_WordEmbedding.Add(term, spVector); } } }
public WordEMWrapFeaturizer(string filename, bool textFormat = false) { Txt2Vec.Model model = new Txt2Vec.Model(); model.LoadModel(filename, textFormat); string[] terms = model.GetAllTerms(); vectorSize = model.VectorSize; m_WordEmbedding = new Dictionary<string, SingleVector>(); m_UnkEmbedding = new SingleVector(vectorSize); foreach (string term in terms) { float[] vector = model.GetVector(term); if (vector != null) { SingleVector spVector = new SingleVector(vectorSize, vector); m_WordEmbedding.Add(term, spVector); } } }