public void NormalizeTest() { var v1 = new SparseVector(new double[] { 1.0, -1.0 }); v1.Normalize(); //Проверяем длину вектора - не должна отличаться от 1 Assert.IsTrue(Math.Abs(Math.Sqrt(v1[0] * v1[0] + v1[1] * v1[1]) - 1) < epsilon); }
private static void BuildExample(TextExample example, Vocabulary voc, int exampleCount) { int dimension = voc.Count; SparseVector vector = new SparseVector(dimension); foreach (string word in example.Tokens.Keys) { int pos = voc.GetWordPosition(word); if (pos == Constants.KEY_NOT_FOUND) continue; // phi i(x) = tfi log(idfi) /k // tfi: number of occurences of the term i in the document x // idfi: the ratio between the total number of documents and the // number of documents containing the term // k: normalisation constant ensuring that ||phi|| = 1 double phi = example.Tokens[word] * Math.Log(exampleCount / voc.WordExampleOccurMap[word]); vector.Components.Add(pos, phi); } vector.Normalize(); example.X = vector; }
private static void BuildExample(TextExample example, Vocabulary voc, int exampleCount) { int dimension = voc.Count; SparseVector vector = new SparseVector(dimension); foreach (string word in example.Tokens.Keys) { int pos = voc.GetWordPosition(word); if (pos == Constants.KEY_NOT_FOUND) { continue; } // phi i(x) = tfi log(idfi) /k // tfi: number of occurences of the term i in the document x // idfi: the ratio between the total number of documents and the // number of documents containing the term // k: normalisation constant ensuring that ||phi|| = 1 double phi = example.Tokens[word] * Math.Log(exampleCount / voc.WordExampleOccurMap[word]); vector.Components.Add(pos, phi); } vector.Normalize(); example.X = vector; }
public void NormalizeThrowsExceptionWhenNullVector() { SparseVector v = new SparseVector(5); v.Normalize(); }