/// <summary> /// cosine sim btw src and tgt, where src/tgt are in matrix format /// </summary> /// <param name="src"></param> /// <param name="tgt"></param> /// <returns></returns> public double CosineSim(string src, string tgt) { double sim = 0; // Dictionary<int, double> srcVec = TextUtils.String2L3g(src, m_V, m_LetterNgram); // Dictionary<int, double> tgtVec = TextUtils.String2L3g(tgt, m_V, m_LetterNgram); List <Dictionary <int, double> > srcMt = TextUtils.String2Matrix(src); List <Dictionary <int, double> > tgtMt = TextUtils.String2Matrix(tgt); sim = NNModelUtils.CosineSim(m_SrcModel.Fprop(srcMt), m_TgtModel.Fprop(tgtMt)); return(sim); }
/// <summary> /// forward propogation /// </summary> /// <param name="fvs"></param> /// <returns></returns> public double[] Fprop(List <Dictionary <int, double> > rgFvs) { if (NumLayer <= 0) { throw new Exception("Error: the model is invalid"); } //if (fvs.Count != NumInputNode) // throw new Exception("Error: the dim of input vector doesn't match the model in Fprop."); // convolutional layer List <double[]> rgY1 = new List <double[]>(); for (int i = 0; i < rgFvs.Count; i++) { Dictionary <int, double> concat_fea = new Dictionary <int, double>(); for (int ws = -CWinSize / 2; ws <= CWinSize / 2; ws++) { if (i + ws >= 0 && i + ws < rgFvs.Count) { TextUtils.FeatureConcate(concat_fea, rgFvs[i + ws], (ws + CWinSize / 2) * NumInputNode); } } rgY1.Add(NNModelUtils.ProjectionByATxSparse(m_rgW[0], concat_fea)); } // max-pooling layer double[] Y = new double[NumOutputLayerNode(0)]; for (int i = 0; i < NumOutputLayerNode(0); i++) { for (int k = 0; k < rgY1.Count; k++) { if (k == 0 || rgY1[k][i] > Y[i]) { Y[i] = rgY1[k][i]; } } } Y = NNModelUtils.Tanh(Y, 1.0); // semantic layers for (int n = 1; n < NumLayer; ++n) { Y = NNModelUtils.ProjectionByATx(m_rgW[n], Y); Y = NNModelUtils.Tanh(Y, 1.0); } return(Y); }
static void Main(string[] args) { Console.WriteLine("Loading models..."); TypModels = new DSSMModel("ACT Model", @"..\..\..\..\..\models\ACT.Prefix", @"..\..\..\..\..\models\ACT.Prefix", DSSMHelper.Vocabulary, 10, false); TopModels = new DSSMModel("Bing CDSSM Model", @"..\..\..\..\..\models\CONSKM_3LAYER_300_300_300_FILTER6_TITLE_source", @"..\..\..\..\..\models\CONSKM_3LAYER_300_300_300_FILTER6_TITLE_source", DSSMHelper.Vocabulary, 10, false); Console.WriteLine("Loading query embeddings..."); LoadQueries(); while (true) { Console.Write("Enter query: "); string q = Console.ReadLine().ToLower().Trim().Replace("+", "\\+").Replace("-", "\\-"); if (q.Length > 0) { Dictionary <string, double> typResults = new Dictionary <string, double>(); Dictionary <string, double> topResults = new Dictionary <string, double>(); double[] qTypEmbeddings = TypModels.GetSourceEmbeddings(q); double[] qTopEmbeddings = TopModels.GetSourceEmbeddings(q); foreach (KeyValuePair <string, double[]> pair in TypEmbeddings) { double sim = NNModelUtils.CosineSim(qTypEmbeddings, pair.Value); typResults[pair.Key] = sim; } foreach (KeyValuePair <string, double[]> pair in TopEmbeddings) { double sim = NNModelUtils.CosineSim(qTopEmbeddings, pair.Value); topResults[pair.Key] = sim; } List <KeyValuePair <string, double> > typResultsList = typResults.ToList(); List <KeyValuePair <string, double> > topResultsList = topResults.ToList(); typResultsList.Sort((firstPair, nextPair) => { return(nextPair.Value.CompareTo(firstPair.Value)); } ); topResultsList.Sort((firstPair, nextPair) => { return(nextPair.Value.CompareTo(firstPair.Value)); } ); Console.WriteLine("==Typical=="); for (int i = 0; i < Math.Min(10, typResultsList.Count); i++) { Console.WriteLine(typResultsList[i].Key + " (" + typResultsList[i].Value.ToString() + ")"); } Console.WriteLine(); Console.WriteLine("==Topical=="); for (int i = 0; i < Math.Min(10, topResultsList.Count); i++) { Console.WriteLine(topResultsList[i].Key + " (" + topResultsList[i].Value.ToString() + ")"); } Console.WriteLine(); Console.WriteLine(); } } }
static void Main(string[] args) { Console.WriteLine("Loading model..."); Models = new DSSMModel("SearchTrails (20140608) Symmetric Model", @"..\..\..\..\..\models\SearchTrails.SIGIR2015.Bing.SessionPairs.Symmetric.Deep.20150106", @"..\..\..\..\..\models\SearchTrails.SIGIR2015.Bing.SessionPairs.Symmetric.Deep.20150106", DSSMHelper.Vocabulary, 10, false); Console.WriteLine("Loading query embeddings..."); LoadQueries(); while (true) { Console.Write("Enter query: "); string q = Console.ReadLine().ToLower().Trim().Replace("+", "\\+").Replace("-", "\\-"); if (q.Length > 0) { Dictionary <string, double> results = new Dictionary <string, double>(); double[] sourceEmbeddings = new double[Models.NumOutputNode]; string[] parts = q.Split(new char[] { '\\' }, StringSplitOptions.RemoveEmptyEntries); foreach (string part in parts) { string subtext = part.Trim(); int sign = 1; if (subtext[0] == '+') { subtext = subtext.Remove(0, 1).Trim(); } else if (subtext[0] == '-') { subtext = subtext.Remove(0, 1).Trim(); sign = -1; } double[] qEmbeddings = Models.GetSourceEmbeddings(subtext); double norm = Math.Max(1e-20, NNModelUtils.Norm(qEmbeddings)); for (int i = 0; i < qEmbeddings.Length; i++) { sourceEmbeddings[i] += (sign * qEmbeddings[i] / norm); } } foreach (KeyValuePair <string, double[]> pair in Queries) { double sim = NNModelUtils.CosineSim(sourceEmbeddings, pair.Value); results[pair.Key] = sim; } List <KeyValuePair <string, double> > resultsList = results.ToList(); resultsList.Sort((firstPair, nextPair) => { return(nextPair.Value.CompareTo(firstPair.Value)); } ); for (int i = 0; i < Math.Min(10, resultsList.Count); i++) { Console.WriteLine(resultsList[i].Key + " (" + resultsList[i].Value.ToString() + ")"); } Console.WriteLine(); Console.WriteLine(); } } }
/// <summary> /// compute sim btw src and tgt /// </summary> /// <param name="inTSV">input labeled data file</param> /// <param name="inSrc">in vector format</param> /// <param name="inTgt">in vector format</param> /// <param name="FeatName">feature name</param> /// <param name="outTSV">output score file</param> /// <param name="bOutputVector">whether to output vector</param> public void PredictingV1(string inTSV, string inSrc, string inTgt, string FeatName, string outTSV, bool bOutputVector) { StreamWriter sw = new StreamWriter(outTSV); StreamReader sr = null; if (inTSV != "") { sr = new StreamReader(inTSV); } Console.WriteLine("computing sim..."); string sLine = ""; int n = 0; if (sr != null) { sLine = sr.ReadLine(); sw.Write("{0}\t{1}", sLine, FeatName); if (bOutputVector) { for (int i = 0; i < m_SrcModel.NumOutputNode; ++i) { sw.Write("\t{0}_s{1}", FeatName, i); } for (int i = 0; i < m_TgtModel.NumOutputNode; ++i) { sw.Write("\t{0}_t{1}", FeatName, i); } } sw.Write("\n"); } sLine = ""; foreach (Pair <string, string> p in PairEnum <string, string> .E(FileEnum.GetLines(inSrc), FileEnum.GetLines(inTgt))) { if (sr != null) { sLine = sr.ReadLine(); } List <Dictionary <int, double> > srcMt = TextUtils.String2Matrix(p.First); List <Dictionary <int, double> > tgtMt = TextUtils.String2Matrix(p.Second); double[] srcVt = m_SrcModel.Fprop(srcMt); double[] tgtVt = m_TgtModel.Fprop(tgtMt); double sim = NNModelUtils.CosineSim(srcVt, tgtVt); if (sr != null) { sw.Write("{0}\t{1}", sLine, (float)sim); } else { sw.Write((float)sim); } if (bOutputVector) { for (int i = 0; i < m_SrcModel.NumOutputNode; ++i) { sw.Write("\t{0}", (float)srcVt[i]); } for (int i = 0; i < m_TgtModel.NumOutputNode; ++i) { sw.Write("\t{0}", (float)tgtVt[i]); } } sw.Write("\n"); n++; if (n % 1000 == 0) { Console.Error.Write("{0}\r", n); } } Console.WriteLine("{0} pairs.", n); sw.Close(); if (sr != null) { sr.Close(); } }