/// <summary>
        /// cosine sim btw src and tgt, where src/tgt are in matrix format
        /// </summary>
        /// <param name="src"></param>
        /// <param name="tgt"></param>
        /// <returns></returns>
        public double CosineSim(string src, string tgt)
        {
            double sim = 0;
            // Dictionary<int, double> srcVec = TextUtils.String2L3g(src, m_V, m_LetterNgram);
            // Dictionary<int, double> tgtVec = TextUtils.String2L3g(tgt, m_V, m_LetterNgram);
            List <Dictionary <int, double> > srcMt = TextUtils.String2Matrix(src);
            List <Dictionary <int, double> > tgtMt = TextUtils.String2Matrix(tgt);

            sim = NNModelUtils.CosineSim(m_SrcModel.Fprop(srcMt), m_TgtModel.Fprop(tgtMt));
            return(sim);
        }
        /// <summary>
        /// forward propogation
        /// </summary>
        /// <param name="fvs"></param>
        /// <returns></returns>
        public double[] Fprop(List <Dictionary <int, double> > rgFvs)
        {
            if (NumLayer <= 0)
            {
                throw new Exception("Error: the model is invalid");
            }

            //if (fvs.Count != NumInputNode)
            //    throw new Exception("Error: the dim of input vector doesn't match the model in Fprop.");

            // convolutional layer
            List <double[]> rgY1 = new List <double[]>();

            for (int i = 0; i < rgFvs.Count; i++)
            {
                Dictionary <int, double> concat_fea = new Dictionary <int, double>();
                for (int ws = -CWinSize / 2; ws <= CWinSize / 2; ws++)
                {
                    if (i + ws >= 0 && i + ws < rgFvs.Count)
                    {
                        TextUtils.FeatureConcate(concat_fea, rgFvs[i + ws], (ws + CWinSize / 2) * NumInputNode);
                    }
                }
                rgY1.Add(NNModelUtils.ProjectionByATxSparse(m_rgW[0], concat_fea));
            }

            // max-pooling layer
            double[] Y = new double[NumOutputLayerNode(0)];
            for (int i = 0; i < NumOutputLayerNode(0); i++)
            {
                for (int k = 0; k < rgY1.Count; k++)
                {
                    if (k == 0 || rgY1[k][i] > Y[i])
                    {
                        Y[i] = rgY1[k][i];
                    }
                }
            }
            Y = NNModelUtils.Tanh(Y, 1.0);

            // semantic layers
            for (int n = 1; n < NumLayer; ++n)
            {
                Y = NNModelUtils.ProjectionByATx(m_rgW[n], Y);
                Y = NNModelUtils.Tanh(Y, 1.0);
            }

            return(Y);
        }
Пример #3
0
        static void Main(string[] args)
        {
            Console.WriteLine("Loading models...");
            TypModels = new DSSMModel("ACT Model", @"..\..\..\..\..\models\ACT.Prefix", @"..\..\..\..\..\models\ACT.Prefix", DSSMHelper.Vocabulary, 10, false);
            TopModels = new DSSMModel("Bing CDSSM Model", @"..\..\..\..\..\models\CONSKM_3LAYER_300_300_300_FILTER6_TITLE_source", @"..\..\..\..\..\models\CONSKM_3LAYER_300_300_300_FILTER6_TITLE_source", DSSMHelper.Vocabulary, 10, false);
            Console.WriteLine("Loading query embeddings...");
            LoadQueries();

            while (true)
            {
                Console.Write("Enter query: ");
                string q = Console.ReadLine().ToLower().Trim().Replace("+", "\\+").Replace("-", "\\-");
                if (q.Length > 0)
                {
                    Dictionary <string, double> typResults = new Dictionary <string, double>();
                    Dictionary <string, double> topResults = new Dictionary <string, double>();
                    double[] qTypEmbeddings = TypModels.GetSourceEmbeddings(q);
                    double[] qTopEmbeddings = TopModels.GetSourceEmbeddings(q);

                    foreach (KeyValuePair <string, double[]> pair in TypEmbeddings)
                    {
                        double sim = NNModelUtils.CosineSim(qTypEmbeddings, pair.Value);
                        typResults[pair.Key] = sim;
                    }

                    foreach (KeyValuePair <string, double[]> pair in TopEmbeddings)
                    {
                        double sim = NNModelUtils.CosineSim(qTopEmbeddings, pair.Value);
                        topResults[pair.Key] = sim;
                    }

                    List <KeyValuePair <string, double> > typResultsList = typResults.ToList();
                    List <KeyValuePair <string, double> > topResultsList = topResults.ToList();

                    typResultsList.Sort((firstPair, nextPair) =>
                    {
                        return(nextPair.Value.CompareTo(firstPair.Value));
                    }
                                        );

                    topResultsList.Sort((firstPair, nextPair) =>
                    {
                        return(nextPair.Value.CompareTo(firstPair.Value));
                    }
                                        );

                    Console.WriteLine("==Typical==");
                    for (int i = 0; i < Math.Min(10, typResultsList.Count); i++)
                    {
                        Console.WriteLine(typResultsList[i].Key + " (" + typResultsList[i].Value.ToString() + ")");
                    }
                    Console.WriteLine();
                    Console.WriteLine("==Topical==");
                    for (int i = 0; i < Math.Min(10, topResultsList.Count); i++)
                    {
                        Console.WriteLine(topResultsList[i].Key + " (" + topResultsList[i].Value.ToString() + ")");
                    }
                    Console.WriteLine();
                    Console.WriteLine();
                }
            }
        }
Пример #4
0
        static void Main(string[] args)
        {
            Console.WriteLine("Loading model...");
            Models = new DSSMModel("SearchTrails (20140608) Symmetric Model", @"..\..\..\..\..\models\SearchTrails.SIGIR2015.Bing.SessionPairs.Symmetric.Deep.20150106", @"..\..\..\..\..\models\SearchTrails.SIGIR2015.Bing.SessionPairs.Symmetric.Deep.20150106", DSSMHelper.Vocabulary, 10, false);
            Console.WriteLine("Loading query embeddings...");
            LoadQueries();

            while (true)
            {
                Console.Write("Enter query: ");
                string q = Console.ReadLine().ToLower().Trim().Replace("+", "\\+").Replace("-", "\\-");
                if (q.Length > 0)
                {
                    Dictionary <string, double> results = new Dictionary <string, double>();
                    double[] sourceEmbeddings           = new double[Models.NumOutputNode];

                    string[] parts = q.Split(new char[] { '\\' }, StringSplitOptions.RemoveEmptyEntries);

                    foreach (string part in parts)
                    {
                        string subtext = part.Trim();
                        int    sign    = 1;

                        if (subtext[0] == '+')
                        {
                            subtext = subtext.Remove(0, 1).Trim();
                        }
                        else if (subtext[0] == '-')
                        {
                            subtext = subtext.Remove(0, 1).Trim();
                            sign    = -1;
                        }

                        double[] qEmbeddings = Models.GetSourceEmbeddings(subtext);
                        double   norm        = Math.Max(1e-20, NNModelUtils.Norm(qEmbeddings));

                        for (int i = 0; i < qEmbeddings.Length; i++)
                        {
                            sourceEmbeddings[i] += (sign * qEmbeddings[i] / norm);
                        }
                    }

                    foreach (KeyValuePair <string, double[]> pair in Queries)
                    {
                        double sim = NNModelUtils.CosineSim(sourceEmbeddings, pair.Value);
                        results[pair.Key] = sim;
                    }

                    List <KeyValuePair <string, double> > resultsList = results.ToList();

                    resultsList.Sort((firstPair, nextPair) =>
                    {
                        return(nextPair.Value.CompareTo(firstPair.Value));
                    }
                                     );

                    for (int i = 0; i < Math.Min(10, resultsList.Count); i++)
                    {
                        Console.WriteLine(resultsList[i].Key + " (" + resultsList[i].Value.ToString() + ")");
                    }
                    Console.WriteLine();
                    Console.WriteLine();
                }
            }
        }
        /// <summary>
        /// compute sim btw src and tgt
        /// </summary>
        /// <param name="inTSV">input labeled data file</param>
        /// <param name="inSrc">in vector format</param>
        /// <param name="inTgt">in vector format</param>
        /// <param name="FeatName">feature name</param>
        /// <param name="outTSV">output score file</param>
        /// <param name="bOutputVector">whether to output vector</param>
        public void PredictingV1(string inTSV, string inSrc, string inTgt, string FeatName, string outTSV, bool bOutputVector)
        {
            StreamWriter sw = new StreamWriter(outTSV);
            StreamReader sr = null;

            if (inTSV != "")
            {
                sr = new StreamReader(inTSV);
            }

            Console.WriteLine("computing sim...");
            string sLine = "";
            int    n     = 0;

            if (sr != null)
            {
                sLine = sr.ReadLine();

                sw.Write("{0}\t{1}", sLine, FeatName);
                if (bOutputVector)
                {
                    for (int i = 0; i < m_SrcModel.NumOutputNode; ++i)
                    {
                        sw.Write("\t{0}_s{1}", FeatName, i);
                    }
                    for (int i = 0; i < m_TgtModel.NumOutputNode; ++i)
                    {
                        sw.Write("\t{0}_t{1}", FeatName, i);
                    }
                }
                sw.Write("\n");
            }

            sLine = "";
            foreach (Pair <string, string> p in PairEnum <string, string> .E(FileEnum.GetLines(inSrc), FileEnum.GetLines(inTgt)))
            {
                if (sr != null)
                {
                    sLine = sr.ReadLine();
                }

                List <Dictionary <int, double> > srcMt = TextUtils.String2Matrix(p.First);
                List <Dictionary <int, double> > tgtMt = TextUtils.String2Matrix(p.Second);
                double[] srcVt = m_SrcModel.Fprop(srcMt);
                double[] tgtVt = m_TgtModel.Fprop(tgtMt);
                double   sim   = NNModelUtils.CosineSim(srcVt, tgtVt);

                if (sr != null)
                {
                    sw.Write("{0}\t{1}", sLine, (float)sim);
                }
                else
                {
                    sw.Write((float)sim);
                }

                if (bOutputVector)
                {
                    for (int i = 0; i < m_SrcModel.NumOutputNode; ++i)
                    {
                        sw.Write("\t{0}", (float)srcVt[i]);
                    }
                    for (int i = 0; i < m_TgtModel.NumOutputNode; ++i)
                    {
                        sw.Write("\t{0}", (float)tgtVt[i]);
                    }
                }
                sw.Write("\n");

                n++; if (n % 1000 == 0)
                {
                    Console.Error.Write("{0}\r", n);
                }
            }
            Console.WriteLine("{0} pairs.", n);

            sw.Close();
            if (sr != null)
            {
                sr.Close();
            }
        }