/// <summary> /// Determines the similarity of this message to the specified subsection of another one. /// </summary> /// <param name="other"></param> /// <returns></returns> private float GetSimilarity(Message other, float begin, float length) { double totalSimilarity = 1.0d; int SampleCount = 20; for (int i = 0; i < SampleCount; i++) { float fullPos = (float)i / (float)(SampleCount - 1); float thisPos = fullPos; float otherPos = begin + length * fullPos; LargeVector localSample = this.SampleVector(thisPos); LargeVector otherSample = other.SampleVector(otherPos); float similarity = 0.1f; if (!localSample.IsEmpty && !otherSample.IsEmpty) { float lengthA = localSample.GetLength(); float lengthB = otherSample.GetLength(); similarity = LargeVector.Dot(localSample, otherSample) / (lengthA * lengthB); } totalSimilarity *= (double)similarity; } return((float)Math.Pow(totalSimilarity, 5.0d / (double)SampleCount)); }
private LargeVector SampleVector(float position) { float targetIndex = (this.vectors.Length - 1) * position; int firstIndex = (int)Math.Floor(targetIndex); int secondIndex = (int)Math.Ceiling(targetIndex); float alpha = targetIndex - firstIndex; return(LargeVector.Lerp(this.vectors[firstIndex], this.vectors[secondIndex], alpha)); }
public static float Dot(LargeVector left, LargeVector right) { float result = 0.0f; for (int i = 0; i < left.data.Length; i++) { result += left.data[i] * right.data[i]; } return(result); }
public static LargeVector Lerp(LargeVector left, LargeVector right, float factor) { LargeVector result = new LargeVector(Math.Max(left.Dimensions, right.Dimensions)); float[] leftData = left.IsEmpty ? result.data : left.data; float[] rightData = right.IsEmpty ? result.data : right.data; for (int i = 0; i < result.data.Length; i++) { result.data[i] = leftData[i] * (1.0f - factor) + rightData[i] * factor; } return(result); }
public float GetSimilarity(string wordA, string wordB) { LargeVector vecA = this.Get(wordA); if (vecA.IsEmpty) { return(0.0f); } LargeVector vecB = this.Get(wordB); if (vecB.IsEmpty) { return(0.0f); } float lengthA = vecA.GetLength(); float lengthB = vecB.GetLength(); return(LargeVector.Dot(vecA, vecB) / (lengthA * lengthB)); }
private LargeVector ReadVector(string word) { long offset = this.SearchIndex(word); if (offset == -1) { return(LargeVector.Empty); } this.dataReader.BaseStream.Seek(offset, SeekOrigin.Begin); this.dataReader.ReadString(); float[] vectorData = new float[this.vectorDimensions]; for (int i = 0; i < vectorData.Length; i++) { vectorData[i] = this.dataReader.ReadSingle(); } LargeVector vector = new LargeVector(vectorData); return(vector); }