/// <summary>Read word vectors from an input stream.</summary> /// <remarks>Read word vectors from an input stream. The stream is not closed on finishing the function.</remarks> /// <param name="in">The stream to read from. This is not closed.</param> /// <returns>The word vectors encoded on the stream.</returns> /// <exception cref="System.IO.IOException">Thrown if we could not read from the stream.</exception> public static VectorMap Deserialize(InputStream @in) { DataInputStream dataIn = new DataInputStream(@in); // Read the max key length VectorMap.Itype keyIntType = VectorMap.Itype.GetType(dataIn.ReadInt()); // Read the vector dimensionality int dim = dataIn.ReadInt(); // Read the size of the dataset int size = dataIn.ReadInt(); // Read the vectors VectorMap vectors = new VectorMap(); for (int i = 0; i < size; ++i) { // Read the key int strlen = keyIntType.Read(dataIn); byte[] buffer = new byte[strlen]; if (dataIn.Read(buffer, 0, strlen) != strlen) { throw new IOException("Could not read string buffer fully!"); } string key = Sharpen.Runtime.GetStringForBytes(buffer); // Read the vector float[] vector = new float[dim]; for (int k = 0; k < vector.Length; ++k) { vector[k] = ToFloat(dataIn.ReadShort()); } // Add the key/value vectors[key] = vector; } return(vectors); }
/// <summary>Write the word vectors to an output stream.</summary> /// <remarks> /// Write the word vectors to an output stream. The stream is not closed on finishing /// the function. /// </remarks> /// <param name="out">The stream to write to.</param> /// <exception cref="System.IO.IOException">Thrown if the stream could not be written to.</exception> public virtual void Serialize(OutputStream @out) { DataOutputStream dataOut = new DataOutputStream(@out); // Write some length statistics int maxKeyLength = 0; int vectorLength = 0; foreach (KeyValuePair <string, float[]> entry in this) { maxKeyLength = Math.Max(Sharpen.Runtime.GetBytesForString(entry.Key).Length, maxKeyLength); vectorLength = entry.Value.Length; } VectorMap.Itype keyIntType = VectorMap.Itype.GetType(maxKeyLength); // Write the key length dataOut.WriteInt(maxKeyLength); // Write the vector dim dataOut.WriteInt(vectorLength); // Write the size of the dataset dataOut.WriteInt(this.Count); foreach (KeyValuePair <string, float[]> entry_1 in this) { // Write the length of the key byte[] key = Sharpen.Runtime.GetBytesForString(entry_1.Key); keyIntType.Write(dataOut, key.Length); dataOut.Write(key); // Write the vector foreach (float v in entry_1.Value) { dataOut.WriteShort(FromFloat(v)); } } }
/// <summary>Get the minimum integer type that will fit this number.</summary> internal static VectorMap.Itype GetType(int num) { VectorMap.Itype t = VectorMap.Itype.Int32; if (num < short.MaxValue) { t = VectorMap.Itype.Int16; } if (num < byte.MaxValue) { t = VectorMap.Itype.Int8; } return(t); }