/// <summary> /// cosine sim btw src and tgt, where src/tgt are in matrix format /// </summary> /// <param name="src"></param> /// <param name="tgt"></param> /// <returns></returns> public double CosineSim(string src, string tgt) { double sim = 0; // Dictionary<int, double> srcVec = TextUtils.String2L3g(src, m_V, m_LetterNgram); // Dictionary<int, double> tgtVec = TextUtils.String2L3g(tgt, m_V, m_LetterNgram); List <Dictionary <int, double> > srcMt = TextUtils.String2Matrix(src); List <Dictionary <int, double> > tgtMt = TextUtils.String2Matrix(tgt); sim = NNModelUtils.CosineSim(m_SrcModel.Fprop(srcMt), m_TgtModel.Fprop(tgtMt)); return(sim); }
private double[] GetEmbeddings(DNN model, string text, int windowSize, FeatureType featureType, int nHashCount) { text = TextUtils.N1Normalize(text); if (text.Length == 0) { text = "#"; } List <Dictionary <int, double> > rgSideWfs = new List <Dictionary <int, double> >(); var featStrFeq = TextUtils.String2FeatStrSeq(text, 3, windowSize, featureType); // letter N-gram if (featureType == FeatureType.wordhash) { rgSideWfs = TextUtils.StrFreq2IdFreq(featStrFeq, nHashCount); } else { rgSideWfs = TextUtils.StrFreq2IdFreq(featStrFeq, this.Vocabulary); } return(model.Fprop(rgSideWfs)); }