/// <summary>This function assumes both vectors are infinitely padded with 0s, so it won't complain if there's a dim mismatch.</summary> /// <remarks> /// This function assumes both vectors are infinitely padded with 0s, so it won't complain if there's a dim mismatch. /// There are no side effects. /// </remarks> /// <param name="other">the MV to dot product with</param> /// <returns>the dot product of this and other</returns> public virtual double DotProduct(Edu.Stanford.Nlp.Loglinear.Model.ConcatVector other) { if (loadedNative) { return(DotProductNative(other)); } else { double sum = 0.0f; for (int i = 0; i < Math.Min(pointers.Length, other.pointers.Length); i++) { if (pointers[i] == null || other.pointers[i] == null) { continue; } if (sparse[i] && other.sparse[i]) { if ((int)pointers[i][0] == (int)other.pointers[i][0]) { sum += pointers[i][1] * other.pointers[i][1]; } } else { if (sparse[i] && !other.sparse[i]) { int sparseIndex = (int)pointers[i][0]; if (sparseIndex >= 0 && sparseIndex < other.pointers[i].Length) { sum += other.pointers[i][sparseIndex] * pointers[i][1]; } } else { if (!sparse[i] && other.sparse[i]) { int sparseIndex = (int)other.pointers[i][0]; if (sparseIndex >= 0 && sparseIndex < pointers[i].Length) { sum += pointers[i][sparseIndex] * other.pointers[i][1]; } } else { for (int j = 0; j < Math.Min(pointers[i].Length, other.pointers[i].Length); j++) { sum += pointers[i][j] * other.pointers[i][j]; } } } } } return(sum); } }
/// <summary> /// Creates a ConcatVector whose dimensions are the same as this one for all dense components, but is otherwise /// completely empty. /// </summary> /// <remarks> /// Creates a ConcatVector whose dimensions are the same as this one for all dense components, but is otherwise /// completely empty. This is useful to prevent resizing during optimizations where we're adding lots of sparse /// vectors. /// </remarks> /// <returns>an empty vector suitable for use as a gradient</returns> public virtual Edu.Stanford.Nlp.Loglinear.Model.ConcatVector NewEmptyClone() { Edu.Stanford.Nlp.Loglinear.Model.ConcatVector clone = new Edu.Stanford.Nlp.Loglinear.Model.ConcatVector(GetNumberOfComponents()); for (int i = 0; i < pointers.Length; i++) { if (pointers[i] != null && !sparse[i]) { clone.pointers[i] = new double[pointers[i].Length]; clone.sparse[i] = false; } } return(clone); }
/// <summary>Recreates an in-memory concat vector object from a Proto serialization.</summary> /// <param name="m">the concat vector proto</param> /// <returns>an in-memory concat vector object</returns> public static Edu.Stanford.Nlp.Loglinear.Model.ConcatVector ReadFromProto(ConcatVectorProto.ConcatVector m) { int components = m.GetComponentCount(); Edu.Stanford.Nlp.Loglinear.Model.ConcatVector vec = new Edu.Stanford.Nlp.Loglinear.Model.ConcatVector(); vec.pointers = new double[components][]; vec.sparse = new bool[components]; for (int i = 0; i < components; i++) { ConcatVectorProto.ConcatVector.Component c = m.GetComponent(i); vec.sparse[i] = c.GetSparse(); int dataSize = c.GetDataCount(); vec.pointers[i] = new double[dataSize]; for (int j = 0; j < dataSize; j++) { vec.pointers[i][j] = c.GetData(j); } } return(vec); }
/// <summary>Clone a concat vector constructor.</summary> /// <remarks>Clone a concat vector constructor. Marks both vectors as copyOnWrite, but makes no immediate copies.</remarks> /// <param name="clone">the concat vector to clone.</param> private ConcatVector(Edu.Stanford.Nlp.Loglinear.Model.ConcatVector clone) { pointers = new double[clone.pointers.Length][]; copyOnWrite = new bool[clone.pointers.Length]; for (int i = 0; i < clone.pointers.Length; i++) { if (clone.pointers[i] == null) { continue; } pointers[i] = clone.pointers[i]; copyOnWrite[i] = true; clone.copyOnWrite[i] = true; } sparse = new bool[clone.pointers.Length]; if (clone.pointers.Length > 0) { System.Array.Copy(clone.sparse, 0, sparse, 0, clone.pointers.Length); } }
// Right now I'm not loading the native library even if it's available, since the dot product "speedup" is actually // 10x slower. First need to diagnose if a speedup is possible by going through the JNI, which is unlikely. /* * static { * try { * System.load(System.getProperty("user.dir")+"/src/main/c/libconcatvec.so"); * loadedNative = true; * } * catch (UnsatisfiedLinkError e) { * log.info("Couldn't find the native acceleration library for ConcatVector"); * } * } */ private double DotProductNative(Edu.Stanford.Nlp.Loglinear.Model.ConcatVector other) { }
/// <summary>Compares two concat vectors by value.</summary> /// <remarks> /// Compares two concat vectors by value. This means that we're 0 padding, so a dense and sparse component might /// both be considered the same, if the dense array reflects the same value as the sparse array. This is pretty much /// only useful for testing. Since it's primarily for testing, we went with the slower, more obviously correct design. /// </remarks> /// <param name="other">the vector we're comparing to</param> /// <param name="tolerance">the amount any pair of values can differ before we say the two vectors are different.</param> /// <returns>whether the two vectors are the same</returns> public virtual bool ValueEquals(Edu.Stanford.Nlp.Loglinear.Model.ConcatVector other, double tolerance) { for (int i = 0; i < Math.Max(pointers.Length, other.pointers.Length); i++) { int size = 0; // Find the maximum non-zero element in this component if (i < pointers.Length && i < other.pointers.Length && pointers[i] == null && other.pointers[i] == null) { size = 0; } else { if (i >= pointers.Length || (i < pointers.Length && pointers[i] == null)) { if (i >= other.pointers.Length) { size = 0; } else { if (other.sparse[i]) { size = other.GetSparseIndex(i) + 1; } else { size = other.pointers[i].Length; } } } else { if (i >= other.pointers.Length || (i < other.pointers.Length && other.pointers[i] == null)) { if (i >= pointers.Length) { size = 0; } else { if (sparse[i]) { size = GetSparseIndex(i) + 1; } else { size = pointers[i].Length; } } } else { if (sparse[i] && GetSparseIndex(i) >= size) { size = GetSparseIndex(i) + 1; } else { if (!sparse[i] && pointers[i].Length > size) { size = pointers[i].Length; } } if (other.sparse[i] && other.GetSparseIndex(i) >= size) { size = other.GetSparseIndex(i) + 1; } else { if (!other.sparse[i] && other.pointers[i].Length > size) { size = other.pointers[i].Length; } } } } } for (int j = 0; j < size; j++) { if (Math.Abs(GetValueAt(i, j) - other.GetValueAt(i, j)) > tolerance) { return(false); } } } return(true); }
/// <summary>This will multiply the vector "other" to this vector.</summary> /// <remarks> /// This will multiply the vector "other" to this vector. It's the equivalent of the Matlab /// <p> /// this = this .* other /// <p> /// The function assumes that both vectors are padded infinitely with 0s, so will result in lots of 0s in this /// vector if it is longer than 'other'. /// </remarks> /// <param name="other">the vector to multiply into this one</param> public virtual void ElementwiseProductInPlace(Edu.Stanford.Nlp.Loglinear.Model.ConcatVector other) { for (int i = 0; i < pointers.Length; i++) { if (pointers[i] == null) { continue; } if (copyOnWrite[i]) { copyOnWrite[i] = false; pointers[i] = pointers[i].MemberwiseClone(); } if (i >= other.pointers.Length) { if (sparse[i]) { pointers[i][1] = 0; } else { for (int j = 0; j < pointers[i].Length; j++) { pointers[i][j] = 0; } } } else { if (other.pointers[i] == null) { pointers[i] = null; } else { if (sparse[i] && other.sparse[i]) { if ((int)pointers[i][0] == (int)other.pointers[i][0]) { pointers[i][1] *= other.pointers[i][1]; } else { pointers[i][1] = 0.0f; } } else { if (sparse[i] && !other.sparse[i]) { int sparseIndex = (int)pointers[i][0]; if (sparseIndex >= 0 && sparseIndex < other.pointers[i].Length) { pointers[i][1] *= other.pointers[i][sparseIndex]; } else { pointers[i][1] = 0.0f; } } else { if (!sparse[i] && other.sparse[i]) { int sparseIndex = (int)other.pointers[i][0]; double sparseValue = 0.0f; if (sparseIndex >= 0 && sparseIndex < pointers[i].Length) { sparseValue = pointers[i][sparseIndex] * other.pointers[i][1]; } sparse[i] = true; pointers[i] = new double[] { sparseIndex, sparseValue }; } else { for (int j = 0; j < Math.Min(pointers[i].Length, other.pointers[i].Length); j++) { pointers[i][j] *= other.pointers[i][j]; } for (int j_1 = other.pointers[i].Length; j_1 < pointers[i].Length; j_1++) { pointers[i][j_1] = 0.0f; } } } } } } } }
/// <summary>This will add the vector "other" to this vector, scaling other by multiple.</summary> /// <remarks> /// This will add the vector "other" to this vector, scaling other by multiple. In algebra, /// <p> /// this = this + (other * multiple) /// <p> /// The function assumes that both vectors are padded infinitely with 0s, so will scale this vector by adding components /// and changing component sizes (dense to bigger dense) and shapes (sparse to dense) in order to accommodate the result. /// </remarks> /// <param name="other">the vector to add to this one</param> /// <param name="multiple">the multiple to use</param> public virtual void AddVectorInPlace(Edu.Stanford.Nlp.Loglinear.Model.ConcatVector other, double multiple) { // Resize if necessary if (pointers == null) { pointers = new double[other.pointers.Length][]; sparse = new bool[other.pointers.Length]; copyOnWrite = new bool[other.pointers.Length]; } else { if (pointers.Length < other.pointers.Length) { IncreaseSizeTo(other.pointers.Length); } } // Do the addition piece by piece for (int i = 0; i < other.pointers.Length; i++) { // If the other vector has no segment here, then skip if (other.pointers[i] == null) { continue; } // If we previously had no element here, fill it in accordingly if (pointers[i] == null || pointers[i].Length == 0) { sparse[i] = other.sparse[i]; // If the multiple is one, just follow the copying procedure if (multiple == 1.0) { pointers[i] = other.pointers[i]; copyOnWrite[i] = true; other.copyOnWrite[i] = true; } else { // Otherwise do the standard thing if (other.sparse[i]) { pointers[i] = new double[2]; copyOnWrite[i] = false; pointers[i][0] = other.pointers[i][0]; pointers[i][1] = other.pointers[i][1] * multiple; } else { pointers[i] = new double[other.pointers[i].Length]; copyOnWrite[i] = false; for (int j = 0; j < other.pointers[i].Length; j++) { pointers[i][j] = other.pointers[i][j] * multiple; } } } } else { // Handle rescaling on a component-by-component basis if (sparse[i] && !other.sparse[i]) { int sparseIndex = (int)pointers[i][0]; double sparseValue = pointers[i][1]; sparse[i] = false; pointers[i] = new double[Math.Max(sparseIndex + 1, other.pointers[i].Length)]; copyOnWrite[i] = false; if (sparseIndex >= 0) { pointers[i][sparseIndex] = sparseValue; } for (int j = 0; j < other.pointers[i].Length; j++) { pointers[i][j] += other.pointers[i][j] * multiple; } } else { if (sparse[i] && other.sparse[i]) { int mySparseIndex = (int)pointers[i][0]; int otherSparseIndex = (int)other.pointers[i][0]; if (mySparseIndex == otherSparseIndex) { if (copyOnWrite[i]) { pointers[i] = pointers[i].MemberwiseClone(); copyOnWrite[i] = false; } pointers[i][1] += other.pointers[i][1] * multiple; } else { sparse[i] = false; double mySparseValue = pointers[i][1]; pointers[i] = new double[Math.Max(mySparseIndex + 1, otherSparseIndex + 1)]; copyOnWrite[i] = false; if (mySparseIndex >= 0) { pointers[i][mySparseIndex] = mySparseValue; } if (otherSparseIndex >= 0) { pointers[i][otherSparseIndex] = other.pointers[i][1] * multiple; } } } else { if (!sparse[i] && other.sparse[i]) { int sparseIndex = (int)other.pointers[i][0]; if (sparseIndex >= pointers[i].Length) { int newSize = pointers[i].Length; while (newSize <= sparseIndex) { newSize *= 2; } double[] denseBuf = new double[newSize]; System.Array.Copy(pointers[i], 0, denseBuf, 0, pointers[i].Length); copyOnWrite[i] = false; pointers[i] = denseBuf; } if (sparseIndex >= 0) { if (copyOnWrite[i]) { pointers[i] = pointers[i].MemberwiseClone(); copyOnWrite[i] = false; } pointers[i][sparseIndex] += other.pointers[i][1] * multiple; } } else { System.Diagnostics.Debug.Assert((!sparse[i] && !other.sparse[i])); if (pointers[i].Length < other.pointers[i].Length) { double[] denseBuf = new double[other.pointers[i].Length]; System.Array.Copy(pointers[i], 0, denseBuf, 0, pointers[i].Length); copyOnWrite[i] = false; pointers[i] = denseBuf; } if (copyOnWrite[i]) { pointers[i] = pointers[i].MemberwiseClone(); copyOnWrite[i] = false; } for (int j = 0; j < other.pointers[i].Length; j++) { pointers[i][j] += other.pointers[i][j] * multiple; } } } } } } }