private void LoadWordEmbedding(string extEmbeddingFilePath, IWeightTensor embeddingMatrix, IEnumerable <KeyValuePair <string, int> > wordToIndex) { Txt2Vec.Model extEmbeddingModel = new Txt2Vec.Model(); if (extEmbeddingFilePath.EndsWith("txt", StringComparison.InvariantCultureIgnoreCase)) { extEmbeddingModel.LoadTextModel(extEmbeddingFilePath); } else { extEmbeddingModel.LoadBinaryModel(extEmbeddingFilePath); } if (extEmbeddingModel.VectorSize != embeddingMatrix.Columns) { throw new ArgumentException($"Inconsistent embedding size. ExtEmbeddingModel size = '{extEmbeddingModel.VectorSize}', EmbeddingMatrix column size = '{embeddingMatrix.Columns}'"); } foreach (KeyValuePair <string, int> pair in wordToIndex) { float[] vector = extEmbeddingModel.GetVector(pair.Key); if (vector != null) { embeddingMatrix.SetWeightAtRow(pair.Value, vector); } } }
private static void BuildVQMode(string[] args) { int i; string strModelFileName = null; string strVQModelFileName = null; if ((i = ArgPos("-modelfile", args)) >= 0) { strModelFileName = args[i + 1]; } if ((i = ArgPos("-vqmodelfile", args)) >= 0) { strVQModelFileName = args[i + 1]; } if (strModelFileName == null) { Logger.WriteLine(Logger.Level.err, "Failed: must to set the model file name"); UsageVQModel(); return; } if (strVQModelFileName == null) { Logger.WriteLine(Logger.Level.err, "Failed: must to set the VQ model file name"); UsageVQModel(); return; } Txt2Vec.Model model = new Txt2Vec.Model(); model.LoadBinaryModel(strModelFileName); model.BuildVQModel(strVQModelFileName); }
private void LoadWordEmbedding(string extEmbeddingFilePath, IWeightMatrix embeddingMatrix, ConcurrentDictionary <string, int> wordToIndex) { Txt2Vec.Model extEmbeddingModel = new Txt2Vec.Model(); extEmbeddingModel.LoadBinaryModel(extEmbeddingFilePath); if (extEmbeddingModel.VectorSize != embeddingMatrix.Columns) { throw new ArgumentException($"Inconsistent embedding size. ExtEmbeddingModel size = '{extEmbeddingModel.VectorSize}', EmbeddingMatrix column size = '{embeddingMatrix.Columns}'"); } foreach (KeyValuePair <string, int> pair in wordToIndex) { float[] vector = extEmbeddingModel.GetVector(pair.Key); if (vector != null) { embeddingMatrix.SetWeightAtRow(pair.Value, vector); } } }
public WordEMWrapFeaturizer(string filename) { Txt2Vec.Model model = new Txt2Vec.Model(); model.LoadBinaryModel(filename); string[] terms = model.GetAllTerms(); vectorSize = model.VectorSize; m_WordEmbedding = new Dictionary <string, SingleVector>(); m_UnkEmbedding = new SingleVector(vectorSize); foreach (string term in terms) { float[] vector = model.GetVector(term); if (vector != null) { SingleVector spVector = new SingleVector(vectorSize, vector); m_WordEmbedding.Add(term, spVector); } } }
public WordEMWrapFeaturizer(string filename, bool textFormat = false) { Txt2Vec.Model model = new Txt2Vec.Model(); model.LoadModel(filename, textFormat); string[] terms = model.GetAllTerms(); vectorSize = model.VectorSize; m_WordEmbedding = new Dictionary<string, SingleVector>(); m_UnkEmbedding = new SingleVector(vectorSize); foreach (string term in terms) { float[] vector = model.GetVector(term); if (vector != null) { SingleVector spVector = new SingleVector(vectorSize, vector); m_WordEmbedding.Add(term, spVector); } } }
private static void DistanceAnalogyMode(string[] args, string strRunMode) { int i; string strModelFileName = null; int N = 40; bool bTxtFormat = false; if ((i = ArgPos("-txtmodel", args)) >= 0) { bTxtFormat = (int.Parse(args[i + 1]) == 1) ? true : false; } if ((i = ArgPos("-modelfile", args)) >= 0) { strModelFileName = args[i + 1]; } if ((i = ArgPos("-maxword", args)) >= 0) { N = int.Parse(args[i + 1]); } if (strModelFileName == null) { Logger.WriteLine(Logger.Level.err, "Failed: must to set the model file name"); if (strRunMode == "distance") { UsageDistance(); } else { UsageAnalogy(); } return; } if (System.IO.File.Exists(strModelFileName) == false) { Logger.WriteLine(Logger.Level.err, "Failed: model file {0} isn't existed.", strModelFileName); if (strRunMode == "distance") { UsageDistance(); } else { UsageAnalogy(); } return; } Txt2Vec.Model model = new Txt2Vec.Model(); model.LoadModel(strModelFileName, bTxtFormat); Txt2Vec.Decoder decoder = new Txt2Vec.Decoder(model); while (true) { Console.WriteLine("Enter word or sentence (EXIT to break): "); string strLine = Console.ReadLine(); if (strLine == "EXIT") { break; } string[] sents = strLine.Split('\t'); List <Txt2Vec.Result> wsdRstList = null; if (strRunMode == "distance") { if (sents.Length == 1) { wsdRstList = decoder.Distance(sents[0], N); OutputResult(wsdRstList); } else { string[] terms1 = sents[0].Split(); string[] terms2 = sents[1].Split(); double score = decoder.Similarity(terms1, terms2); Console.WriteLine("Similarity score: {0}", score); } } else if (strRunMode == "analogy") { string[] terms = strLine.Split(); Txt2Vec.TermOperation operation = Txt2Vec.TermOperation.ADD; List <Txt2Vec.TermOP> termOPList = new List <Txt2Vec.TermOP>(); foreach (string item in terms) { if (item == "+") { operation = Txt2Vec.TermOperation.ADD; } else if (item == "-") { operation = Txt2Vec.TermOperation.SUB; } else { Txt2Vec.TermOP termOP = new Txt2Vec.TermOP(); termOP.strTerm = item; termOP.operation = operation; termOPList.Add(termOP); } } wsdRstList = decoder.Distance(termOPList, N); OutputResult(wsdRstList); } } }