Пример #1
0
        public WordEMWrapFeaturizer(string filename)
        {
            Txt2Vec.Decoder decoder = new Txt2Vec.Decoder();
            decoder.LoadBinaryModel(filename);

            string[] terms = decoder.GetAllTerms();
            vectorSize = decoder.GetVectorSize();

            m_WordEmbedding = new Dictionary<string, SingleVector>();
            m_UnkEmbedding = new SingleVector(vectorSize);

            foreach (string term in terms)
            {
                double[] vector = decoder.GetVector(term);

                if (vector != null)
                {
                    SingleVector spVector = new SingleVector(vectorSize, vector);

                    spVector.Normalize();

                    m_WordEmbedding.Add(term, spVector);
                }
            }
        }
Пример #2
0
        private static void DumpMode(string[] args)
        {
            int    i;
            string strModelFileName = null;
            string strTextFileName  = null;

            if ((i = ArgPos("-modelfile", args)) >= 0)
            {
                strModelFileName = args[i + 1];
            }
            if ((i = ArgPos("-txtfile", args)) >= 0)
            {
                strTextFileName = args[i + 1];
            }

            if (strModelFileName == null)
            {
                Console.WriteLine("Failed: must to set the model file name");
                UsageDumpModel();
                return;
            }

            if (strTextFileName == null)
            {
                Console.WriteLine("Failed: must to set the text file name");
                UsageDumpModel();
                return;
            }

            Txt2Vec.Decoder decoder = new Txt2Vec.Decoder();
            decoder.LoadBinaryModel(strModelFileName);
            decoder.DumpModel(strTextFileName);
        }
Пример #3
0
        public Vec Sent2Vec(List <string> words)
        {
            Vec vec = new Vec();

            Txt2Vec.Decoder decoder  = new Txt2Vec.Decoder(Model);
            string[]        termList = words.ToArray();
            vec.VecNodes = decoder.ToVector(termList).ToList();

            return(vec);
        }
Пример #4
0
 public void Distance(List <string> words)
 {
     Txt2Vec.Decoder decoder = new Txt2Vec.Decoder(Model);
     words.ForEach(word => {
         Console.WriteLine($"current word: {word}");
         List <Result> sysnonyms = decoder.Distance(word);
         sysnonyms.ForEach(s => {
             Console.WriteLine($"{s.strTerm}: {s.score}");
         });
     });
 }
Пример #5
0
        public Vec Word2Vec(string word)
        {
            Vec vec = new Vec();

            Txt2Vec.Decoder decoder  = new Txt2Vec.Decoder(Model);
            string[]        termList = new string[1];
            termList[0]  = word;
            vec.VecNodes = decoder.ToVector(termList).ToList();

            return(vec);
        }
Пример #6
0
        private static void DistanceAnalogyMode(string[] args, string strRunMode)
        {
            int i;
            string strModelFileName = null;
            int N = 40;
            bool bTxtFormat = false;

            if ((i = ArgPos("-txtmodel", args)) >= 0) bTxtFormat = (int.Parse(args[i + 1]) == 1) ? true : false;
            if ((i = ArgPos("-modelfile", args)) >= 0) strModelFileName = args[i + 1];
            if ((i = ArgPos("-maxword", args)) >= 0) N = int.Parse(args[i + 1]);

            if (strModelFileName == null)
            {
                Console.WriteLine("Failed: must to set the model file name");
                if (strRunMode == "distance")
                {
                    UsageDistance();
                }
                else
                {
                    UsageAnalogy();
                }
                return;
            }
            if (System.IO.File.Exists(strModelFileName) == false)
            {
                Console.WriteLine("Failed: model file {0} isn't existed.", strModelFileName);
                if (strRunMode == "distance")
                {
                    UsageDistance();
                }
                else
                {
                    UsageAnalogy();
                }
                return;
            }

            Txt2Vec.Decoder decoder = new Txt2Vec.Decoder();
            decoder.LoadModel(strModelFileName, bTxtFormat);

            while (true)
            {
                Console.WriteLine("Enter word or sentence (EXIT to break): ");
                string strLine = Console.ReadLine();
                if (strLine == "EXIT") break;

                string[] sents = strLine.Split('\t');

                List<Txt2Vec.Result> wsdRstList = null;
                if (strRunMode == "distance")
                {
                    if (sents.Length == 1)
                    {
                        wsdRstList = decoder.Distance(sents[0], N);
                        OutputResult(wsdRstList);
                    }
                    else
                    {
                        string[] terms1 = sents[0].Split();
                        string[] terms2 = sents[1].Split();

                        double score = decoder.Similarity(terms1, terms2);
                        Console.WriteLine("Similarity score: {0}", score);
                    }
                }
                else if (strRunMode == "analogy")
                {
                    string[] terms = strLine.Split();
                    Txt2Vec.TermOperation operation = Txt2Vec.TermOperation.ADD;
                    List<Txt2Vec.TermOP> termOPList = new List<Txt2Vec.TermOP>();
                    foreach (string item in terms)
                    {
                        if (item == "+")
                        {
                            operation = Txt2Vec.TermOperation.ADD;
                        }
                        else if (item == "-")
                        {
                            operation = Txt2Vec.TermOperation.SUB;
                        }
                        else
                        {
                            Txt2Vec.TermOP termOP = new Txt2Vec.TermOP();
                            termOP.strTerm = item;
                            termOP.operation = operation;
                            termOPList.Add(termOP);
                        }
                    }

                    wsdRstList = decoder.Distance(termOPList, N);

                    OutputResult(wsdRstList);
                }
            }
        }
Пример #7
0
        private static void DumpMode(string[] args)
        {
            int i;
            string strModelFileName = null;
            string strTextFileName = null;
            if ((i = ArgPos("-modelfile", args)) >= 0) strModelFileName = args[i + 1];
            if ((i = ArgPos("-txtfile", args)) >= 0) strTextFileName = args[i + 1];

            if (strModelFileName == null)
            {
                Console.WriteLine("Failed: must to set the model file name");
                UsageDumpModel();
                return;
            }

            if (strTextFileName == null)
            {
                Console.WriteLine("Failed: must to set the text file name");
                UsageDumpModel();
                return;
            }

            Txt2Vec.Decoder decoder = new Txt2Vec.Decoder();
            decoder.LoadBinaryModel(strModelFileName);
            decoder.DumpModel(strTextFileName);
        }
Пример #8
0
        private static void DistanceAnalogyMode(string[] args, string strRunMode)
        {
            int    i;
            string strModelFileName = null;
            int    N          = 40;
            bool   bTxtFormat = false;

            if ((i = ArgPos("-txtmodel", args)) >= 0)
            {
                bTxtFormat = (int.Parse(args[i + 1]) == 1) ? true : false;
            }
            if ((i = ArgPos("-modelfile", args)) >= 0)
            {
                strModelFileName = args[i + 1];
            }
            if ((i = ArgPos("-maxword", args)) >= 0)
            {
                N = int.Parse(args[i + 1]);
            }


            if (strModelFileName == null)
            {
                Logger.WriteLine(Logger.Level.err, "Failed: must to set the model file name");
                if (strRunMode == "distance")
                {
                    UsageDistance();
                }
                else
                {
                    UsageAnalogy();
                }
                return;
            }
            if (System.IO.File.Exists(strModelFileName) == false)
            {
                Logger.WriteLine(Logger.Level.err, "Failed: model file {0} isn't existed.", strModelFileName);
                if (strRunMode == "distance")
                {
                    UsageDistance();
                }
                else
                {
                    UsageAnalogy();
                }
                return;
            }

            Txt2Vec.Model model = new Txt2Vec.Model();
            model.LoadModel(strModelFileName, bTxtFormat);

            Txt2Vec.Decoder decoder = new Txt2Vec.Decoder(model);
            while (true)
            {
                Console.WriteLine("Enter word or sentence (EXIT to break): ");
                string strLine = Console.ReadLine();
                if (strLine == "EXIT")
                {
                    break;
                }

                string[] sents = strLine.Split('\t');

                List <Txt2Vec.Result> wsdRstList = null;
                if (strRunMode == "distance")
                {
                    if (sents.Length == 1)
                    {
                        wsdRstList = decoder.Distance(sents[0], N);
                        OutputResult(wsdRstList);
                    }
                    else
                    {
                        string[] terms1 = sents[0].Split();
                        string[] terms2 = sents[1].Split();

                        double score = decoder.Similarity(terms1, terms2);
                        Console.WriteLine("Similarity score: {0}", score);
                    }
                }
                else if (strRunMode == "analogy")
                {
                    string[] terms = strLine.Split();
                    Txt2Vec.TermOperation operation  = Txt2Vec.TermOperation.ADD;
                    List <Txt2Vec.TermOP> termOPList = new List <Txt2Vec.TermOP>();
                    foreach (string item in terms)
                    {
                        if (item == "+")
                        {
                            operation = Txt2Vec.TermOperation.ADD;
                        }
                        else if (item == "-")
                        {
                            operation = Txt2Vec.TermOperation.SUB;
                        }
                        else
                        {
                            Txt2Vec.TermOP termOP = new Txt2Vec.TermOP();
                            termOP.strTerm   = item;
                            termOP.operation = operation;
                            termOPList.Add(termOP);
                        }
                    }

                    wsdRstList = decoder.Distance(termOPList, N);

                    OutputResult(wsdRstList);
                }
            }
        }