public WordEMWrapFeaturizer(string filename) { Txt2Vec.Decoder decoder = new Txt2Vec.Decoder(); decoder.LoadBinaryModel(filename); string[] terms = decoder.GetAllTerms(); vectorSize = decoder.GetVectorSize(); m_WordEmbedding = new Dictionary<string, SingleVector>(); m_UnkEmbedding = new SingleVector(vectorSize); foreach (string term in terms) { double[] vector = decoder.GetVector(term); if (vector != null) { SingleVector spVector = new SingleVector(vectorSize, vector); spVector.Normalize(); m_WordEmbedding.Add(term, spVector); } } }
private static void DumpMode(string[] args) { int i; string strModelFileName = null; string strTextFileName = null; if ((i = ArgPos("-modelfile", args)) >= 0) { strModelFileName = args[i + 1]; } if ((i = ArgPos("-txtfile", args)) >= 0) { strTextFileName = args[i + 1]; } if (strModelFileName == null) { Console.WriteLine("Failed: must to set the model file name"); UsageDumpModel(); return; } if (strTextFileName == null) { Console.WriteLine("Failed: must to set the text file name"); UsageDumpModel(); return; } Txt2Vec.Decoder decoder = new Txt2Vec.Decoder(); decoder.LoadBinaryModel(strModelFileName); decoder.DumpModel(strTextFileName); }
public Vec Sent2Vec(List <string> words) { Vec vec = new Vec(); Txt2Vec.Decoder decoder = new Txt2Vec.Decoder(Model); string[] termList = words.ToArray(); vec.VecNodes = decoder.ToVector(termList).ToList(); return(vec); }
public void Distance(List <string> words) { Txt2Vec.Decoder decoder = new Txt2Vec.Decoder(Model); words.ForEach(word => { Console.WriteLine($"current word: {word}"); List <Result> sysnonyms = decoder.Distance(word); sysnonyms.ForEach(s => { Console.WriteLine($"{s.strTerm}: {s.score}"); }); }); }
public Vec Word2Vec(string word) { Vec vec = new Vec(); Txt2Vec.Decoder decoder = new Txt2Vec.Decoder(Model); string[] termList = new string[1]; termList[0] = word; vec.VecNodes = decoder.ToVector(termList).ToList(); return(vec); }
private static void DistanceAnalogyMode(string[] args, string strRunMode) { int i; string strModelFileName = null; int N = 40; bool bTxtFormat = false; if ((i = ArgPos("-txtmodel", args)) >= 0) bTxtFormat = (int.Parse(args[i + 1]) == 1) ? true : false; if ((i = ArgPos("-modelfile", args)) >= 0) strModelFileName = args[i + 1]; if ((i = ArgPos("-maxword", args)) >= 0) N = int.Parse(args[i + 1]); if (strModelFileName == null) { Console.WriteLine("Failed: must to set the model file name"); if (strRunMode == "distance") { UsageDistance(); } else { UsageAnalogy(); } return; } if (System.IO.File.Exists(strModelFileName) == false) { Console.WriteLine("Failed: model file {0} isn't existed.", strModelFileName); if (strRunMode == "distance") { UsageDistance(); } else { UsageAnalogy(); } return; } Txt2Vec.Decoder decoder = new Txt2Vec.Decoder(); decoder.LoadModel(strModelFileName, bTxtFormat); while (true) { Console.WriteLine("Enter word or sentence (EXIT to break): "); string strLine = Console.ReadLine(); if (strLine == "EXIT") break; string[] sents = strLine.Split('\t'); List<Txt2Vec.Result> wsdRstList = null; if (strRunMode == "distance") { if (sents.Length == 1) { wsdRstList = decoder.Distance(sents[0], N); OutputResult(wsdRstList); } else { string[] terms1 = sents[0].Split(); string[] terms2 = sents[1].Split(); double score = decoder.Similarity(terms1, terms2); Console.WriteLine("Similarity score: {0}", score); } } else if (strRunMode == "analogy") { string[] terms = strLine.Split(); Txt2Vec.TermOperation operation = Txt2Vec.TermOperation.ADD; List<Txt2Vec.TermOP> termOPList = new List<Txt2Vec.TermOP>(); foreach (string item in terms) { if (item == "+") { operation = Txt2Vec.TermOperation.ADD; } else if (item == "-") { operation = Txt2Vec.TermOperation.SUB; } else { Txt2Vec.TermOP termOP = new Txt2Vec.TermOP(); termOP.strTerm = item; termOP.operation = operation; termOPList.Add(termOP); } } wsdRstList = decoder.Distance(termOPList, N); OutputResult(wsdRstList); } } }
private static void DumpMode(string[] args) { int i; string strModelFileName = null; string strTextFileName = null; if ((i = ArgPos("-modelfile", args)) >= 0) strModelFileName = args[i + 1]; if ((i = ArgPos("-txtfile", args)) >= 0) strTextFileName = args[i + 1]; if (strModelFileName == null) { Console.WriteLine("Failed: must to set the model file name"); UsageDumpModel(); return; } if (strTextFileName == null) { Console.WriteLine("Failed: must to set the text file name"); UsageDumpModel(); return; } Txt2Vec.Decoder decoder = new Txt2Vec.Decoder(); decoder.LoadBinaryModel(strModelFileName); decoder.DumpModel(strTextFileName); }
private static void DistanceAnalogyMode(string[] args, string strRunMode) { int i; string strModelFileName = null; int N = 40; bool bTxtFormat = false; if ((i = ArgPos("-txtmodel", args)) >= 0) { bTxtFormat = (int.Parse(args[i + 1]) == 1) ? true : false; } if ((i = ArgPos("-modelfile", args)) >= 0) { strModelFileName = args[i + 1]; } if ((i = ArgPos("-maxword", args)) >= 0) { N = int.Parse(args[i + 1]); } if (strModelFileName == null) { Logger.WriteLine(Logger.Level.err, "Failed: must to set the model file name"); if (strRunMode == "distance") { UsageDistance(); } else { UsageAnalogy(); } return; } if (System.IO.File.Exists(strModelFileName) == false) { Logger.WriteLine(Logger.Level.err, "Failed: model file {0} isn't existed.", strModelFileName); if (strRunMode == "distance") { UsageDistance(); } else { UsageAnalogy(); } return; } Txt2Vec.Model model = new Txt2Vec.Model(); model.LoadModel(strModelFileName, bTxtFormat); Txt2Vec.Decoder decoder = new Txt2Vec.Decoder(model); while (true) { Console.WriteLine("Enter word or sentence (EXIT to break): "); string strLine = Console.ReadLine(); if (strLine == "EXIT") { break; } string[] sents = strLine.Split('\t'); List <Txt2Vec.Result> wsdRstList = null; if (strRunMode == "distance") { if (sents.Length == 1) { wsdRstList = decoder.Distance(sents[0], N); OutputResult(wsdRstList); } else { string[] terms1 = sents[0].Split(); string[] terms2 = sents[1].Split(); double score = decoder.Similarity(terms1, terms2); Console.WriteLine("Similarity score: {0}", score); } } else if (strRunMode == "analogy") { string[] terms = strLine.Split(); Txt2Vec.TermOperation operation = Txt2Vec.TermOperation.ADD; List <Txt2Vec.TermOP> termOPList = new List <Txt2Vec.TermOP>(); foreach (string item in terms) { if (item == "+") { operation = Txt2Vec.TermOperation.ADD; } else if (item == "-") { operation = Txt2Vec.TermOperation.SUB; } else { Txt2Vec.TermOP termOP = new Txt2Vec.TermOP(); termOP.strTerm = item; termOP.operation = operation; termOPList.Add(termOP); } } wsdRstList = decoder.Distance(termOPList, N); OutputResult(wsdRstList); } } }