/// <summary>Initializes a new instance of the <see cref="ID3v2Frame" /> class.</summary> /// <param name="header">The header.</param> /// <param name="reader">The reader.</param> /// <exception cref="ArgumentNullException">Header.</exception> /// <exception cref="NotSupportedException"></exception> public ID3v2Frame(ID3v2Header header, DataFrameReader reader) { if (reader == null) { throw new ArgumentNullException("Reader"); } m_Header = new ID3v2FrameHeader(header, reader); // prepare content (has to be decoded, decrypted, decompressed, ... m_Content = reader.Read(m_Header.HeaderSize, m_Header.ContentSize); switch (header.Version) { case 2: /*nothing to do, raw plain content data*/ break; case 3: ParseVersion3(reader); break; case 4: ParseVersion4(reader); break; default: throw new NotSupportedException(string.Format("ID3v2.{0} is not supported!", header.Version)); } // copy raw data and remove from reader m_Data = reader.GetBuffer(m_Header.HeaderSize + m_Header.ContentSize); }
public void TestJdbc2() { // arrange mockDataFrameReaderProxy.Setup(m => m.Jdbc(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <string>(), It.IsAny <string>(), It.IsAny <string>(), It.IsAny <int>(), It.IsAny <Dictionary <string, string> >())); var dataFrameReader = new DataFrameReader(mockDataFrameReaderProxy.Object, sparkContext); // Act const string url = "url"; const string table = "table_name"; const string columnName = "col1"; const string lowerBound = "a"; const string upperBound = "z"; const int numPartitions = 5; var connectionProperties = new Dictionary <string, string>() { { "prop1", "value1" }, { "prop2", "value2" } }; dataFrameReader.Jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, connectionProperties); // Assert mockDataFrameReaderProxy.Verify(m => m.Jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, connectionProperties), Times.Once); }
public void TestOptions() { // arrange mockDataFrameReaderProxy.Setup(m => m.Options(It.IsAny <Dictionary <string, string> >())); var dataFrameReader = new DataFrameReader(mockDataFrameReaderProxy.Object, sparkContext); const string key1 = "key1"; const string value1 = "value1"; const string key2 = "key2"; const string value2 = "value2"; var opts = new Dictionary <string, string>() { { key1, value1 }, { key2, value2 } }; // Act dataFrameReader.Options(opts); // Assert mockDataFrameReaderProxy.Verify(m => m.Options(It.Is <Dictionary <string, string> >( dict => dict[key1] == value1 && dict[key2] == value2 && dict.Count == 2) ), Times.Once ); }
bool ParseVersion3(DataFrameReader reader) { if (!reader.EnsureBuffer(4)) { return false; } // calc size byte[] sizeBytes = reader.Read(0, 4); int size = 0; for (int i = 0; i < 4; i++) { size = (size << 8) | sizeBytes[i]; } size += 4; // get data if (!reader.EnsureBuffer(size)) { return false; } m_Data = reader.GetBuffer(size); // get flags m_Flags = ID3v2ExtendedHeaderFlags.FromID3v23(m_Data); return true; }
public void TestSignaturesV2_3_X() { DataFrameReader dfr = _spark.Read(); Assert.IsType <DataFrameReader>(dfr.Format("json")); Assert.IsType <DataFrameReader>( dfr.Schema( new StructType(new[] { new StructField("age", new IntegerType()), new StructField("name", new StringType()) }))); Assert.IsType <DataFrameReader>(dfr.Schema("age INT, name STRING")); Assert.IsType <DataFrameReader>(dfr.Option("stringOption", "value")); Assert.IsType <DataFrameReader>(dfr.Option("boolOption", true)); Assert.IsType <DataFrameReader>(dfr.Option("longOption", 1L)); Assert.IsType <DataFrameReader>(dfr.Option("doubleOption", 3D)); Assert.IsType <DataFrameReader>( dfr.Options( new Dictionary <string, string> { { "option1", "value1" }, { "option2", "value2" } })); string jsonFile = $"{TestEnvironment.ResourceDirectory}people.json"; Assert.IsType <DataFrame>(dfr.Load()); Assert.IsType <DataFrame>(dfr.Load(jsonFile)); Assert.IsType <DataFrame>(dfr.Load(jsonFile, jsonFile)); Assert.IsType <DataFrame>(dfr.Json(jsonFile)); Assert.IsType <DataFrame>(dfr.Json(jsonFile, jsonFile)); string csvFile = $"{TestEnvironment.ResourceDirectory}people.csv"; Assert.IsType <DataFrame>(dfr.Csv(csvFile)); Assert.IsType <DataFrame>(dfr.Csv(csvFile, csvFile)); string parquetFile = $"{TestEnvironment.ResourceDirectory}users.parquet"; Assert.IsType <DataFrame>(dfr.Parquet(parquetFile)); Assert.IsType <DataFrame>(dfr.Parquet(parquetFile, parquetFile)); string orcFile = $"{TestEnvironment.ResourceDirectory}users.orc"; Assert.IsType <DataFrame>(dfr.Orc(orcFile)); Assert.IsType <DataFrame>(dfr.Orc(orcFile, orcFile)); dfr = _spark.Read(); string textFile = $"{TestEnvironment.ResourceDirectory}people.txt"; Assert.IsType <DataFrame>(dfr.Text(textFile)); Assert.IsType <DataFrame>(dfr.Text(textFile, textFile)); }
public void TestSchema() { // arrange mockDataFrameReaderProxy.Setup(m => m.Schema(It.IsAny <StructType>())); var dataFrameReader = new DataFrameReader(mockDataFrameReaderProxy.Object, sparkContext); const string jsonSchema = @" { ""type"" : ""struct"", ""fields"" : [ { ""name"" : ""address"", ""type"" : { ""type"" : ""struct"", ""fields"" : [ { ""name"" : ""city"", ""type"" : ""string"", ""nullable"" : true, ""metadata"" : { } }, { ""name"" : ""state"", ""type"" : ""string"", ""nullable"" : true, ""metadata"" : { } } ] }, ""nullable"" : true, ""metadata"" : { } }, { ""name"" : ""age"", ""type"" : ""long"", ""nullable"" : true, ""metadata"" : { } }, { ""name"" : ""id"", ""type"" : ""string"", ""nullable"" : true, ""metadata"" : { } }, { ""name"" : ""name"", ""type"" : ""string"", ""nullable"" : true, ""metadata"" : { } } ] }"; var mockStructTypeProxy = new MockStructTypeProxy(jsonSchema); var schema = new StructType(mockStructTypeProxy); // act var reader = dataFrameReader.Schema(schema); // verify Assert.IsNotNull(reader); Assert.AreSame(reader, dataFrameReader); mockDataFrameReaderProxy.Verify(m => m.Schema(schema), Times.Once); }
static void Main(string[] args) { var spark = SparkSession.Builder().GetOrCreate(); DataFrameReader reader = spark.Read().Format("csv").Option("header", true).Option("sep", ","); var dataFrame = reader.Load("./csv_file.csv"); dataFrame.Show(); }
void ParseVersion3(DataFrameReader reader) { if (m_Header.Flags.Compression) { m_Content = Decompress(m_Content); } if (m_Header.Flags.Encryption) { m_Content = Decrypt(m_Content); } }
public void TestLoad() { // arrange mockDataFrameReaderProxy.Setup(m => m.Load()); var dataFrameReader = new DataFrameReader(mockDataFrameReaderProxy.Object, sparkContext); // Act dataFrameReader.Load(); // Assert mockDataFrameReaderProxy.Verify(m => m.Load(), Times.Once); }
/// <summary>Initializes a new instance of the <see cref="ID3v2FrameHeader"/> class.</summary> /// <param name="header">The header.</param> /// <param name="reader">The reader.</param> /// <exception cref="NotSupportedException"></exception> public ID3v2FrameHeader(ID3v2Header header, DataFrameReader reader) { TagHeader = header; switch (header.Version) { case 2: ParseVersion2(reader.Read(0, 6)); break; case 3: ParseVersion3(reader.Read(0, 10)); break; case 4: ParseVersion4(reader.Read(0, 10)); break; default: throw new NotSupportedException(string.Format("Unsupported ID3v2 Version {0}", header.Version)); } }
public void TestFormat() { // arrange mockDataFrameReaderProxy.Setup(m => m.Format(It.IsAny <string>())); var dataFrameReader = new DataFrameReader(mockDataFrameReaderProxy.Object, sparkContext); // act var reader = dataFrameReader.Format("json"); // verify Assert.IsNotNull(reader); Assert.AreSame(reader, dataFrameReader); mockDataFrameReaderProxy.Verify(m => m.Format("json"), Times.Once); }
public void TestOption() { // arrange mockDataFrameReaderProxy.Setup(m => m.Options(It.IsAny <Dictionary <string, string> >())); var dataFrameReader = new DataFrameReader(mockDataFrameReaderProxy.Object, sparkContext); const string key = "path"; const string value = "path_value"; // Act dataFrameReader.Option(key, value); // Assert mockDataFrameReaderProxy.Verify(m => m.Options( It.Is <Dictionary <string, string> >(dict => dict[key] == value && dict.Count == 1)), Times.Once); }
private static void LoginSql() { string query = "select * from tblUser"; var spark = SparkSession.Builder().GetOrCreate(); DataFrameReader dataFrameReader = spark.Read(); dataFrameReader = dataFrameReader.Format("jdbc"); dataFrameReader = dataFrameReader.Option("Driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver"); dataFrameReader = dataFrameReader.Option("url", "jdbc:sqlserver://*****:*****@@Error15;"); dataFrameReader = dataFrameReader.Option("dbtable", query); DataFrame dataFrame = dataFrameReader.Load(); dataFrame.Show(); spark.Stop(); }
/// <summary> /// Parses the specified buffer starting at index to load all data for this frame. /// </summary> /// <param name="reader">FrameReader to read from.</param> public override bool Parse(DataFrameReader reader) { if (reader == null) { throw new ArgumentNullException("Reader"); } if (!reader.EnsureBuffer(10)) { return(false); } m_Data = reader.Read(0, 10); ParseData(); return(true); }
public void TestParquet() { // arrange mockDataFrameReaderProxy.Setup(m => m.Parquet(It.IsAny <string[]>())); var dataFrameReader = new DataFrameReader(mockDataFrameReaderProxy.Object, sparkContext); // act const string path1 = "path/to/json"; const string path2 = "path/to/json"; var reader = dataFrameReader.Parquet(path1, path2); // Assert Assert.IsNotNull(reader); mockDataFrameReaderProxy.Verify(m => m.Parquet( It.Is <string[]>(strArray => strArray.Length == 2 && strArray[0] == path1 && strArray[1] == path2)), Times.Once); }
/// <summary> /// Parses the specified buffer starting at index to load all data for this frame. /// </summary> /// <param name="reader">FrameReader to read from.</param> public override bool Parse(DataFrameReader reader) { if (reader == null) { throw new ArgumentNullException("Stream"); } m_Flags = new ID3v2ExtendedHeaderFlags(); switch (m_Header.Version) { case 2: return ParseVersion2(reader); case 3: return ParseVersion3(reader); case 4: return ParseVersion4(reader); default: return false; } }
/// <summary> /// Parses the specified buffer starting at index to load all data for this frame /// This function will throw exceptions on parser errors. /// </summary> /// <param name="frameReader">FrameReader to read from.</param> public override bool Parse(DataFrameReader frameReader) { var reader = new ID3v2Reader(frameReader); // read header Header = reader.ReadHeader(out data); if (Header == null) { return(false); } // read extended header (may be null) if (!reader.ReadExtendedHeader(out extendedHeader)) { return(false); } // read frames switch (Header.Version) { case 2: if (!ParseFramesV2(reader)) { return(false); } break; case 3: case 4: if (!ParseFrames(reader)) { return(false); } break; default: return(false); } // read footer if (reader.State == ID3v2ReaderState.ReadFooter) { if (!reader.ReadFooter(out footer)) { return(false); } } return(true); }
public void TestJson() { // arrange mockDataFrameReaderProxy.Setup(m => m.Format(It.IsAny <string>())); var dataFrameReader = new DataFrameReader(mockDataFrameReaderProxy.Object, sparkContext); // act const string path = "path/to/json"; var reader = dataFrameReader.Json(path); // Assert Assert.IsNotNull(reader); mockDataFrameReaderProxy.Verify(m => m.Format("json"), Times.Once); mockDataFrameReaderProxy.Verify(m => m.Options( It.Is <Dictionary <string, string> >(dict => dict["path"] == path && dict.Count == 1)), Times.Once); mockDataFrameReaderProxy.Verify(m => m.Load(), Times.Once); }
public static byte[] Reader(DataFrameReader reader, int start, int unsyncedLength) { var result = new byte[unsyncedLength]; var n = start; for (var i = 0; i < result.Length; i++) { var b = reader.ReadByte(n++); if (b != 0xFF) { result[i] = b; } else { result[i] = reader.ReadByte(n++); } } return(result); }
/// <summary> /// Parses the specified buffer starting at index to load all data for this frame /// This function will throw exceptions on parser errors. /// </summary> /// <param name="reader">FrameReader to read from.</param> void ParseVersion4(DataFrameReader reader) { if ((m_Header.TagHeader.Flags & ID3v2HeaderFlags.Unsynchronisation) == 0) { // no full unsync done, check if we have to unsync now if (m_Header.Flags.Unsynchronisation) { m_Content = ID3v2DeUnsync.Buffer(m_Content); } } if (m_Header.Flags.Compression) { m_Content = Decompress(m_Content); } if (m_Header.Flags.Encryption) { m_Content = Decrypt(m_Content); } }
public void TestJdbc1() { // arrange mockDataFrameReaderProxy.Setup(m => m.Jdbc(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <Dictionary <string, string> >())); var dataFrameReader = new DataFrameReader(mockDataFrameReaderProxy.Object, sparkContext); // Act const string url = "url"; const string table = "table_name"; var properties = new Dictionary <string, string>() { { "prop1", "value1" }, { "prop2", "value2" } }; dataFrameReader.Jdbc(url, table, properties); // Assert mockDataFrameReaderProxy.Verify(m => m.Jdbc(url, table, properties), Times.Once); }
/// <summary> /// Reads the header (check <see cref="State"/> before usage). /// </summary> /// <returns></returns> public ID3v2Header ReadHeader(out byte[] tagData) { if (State != ID3v2ReaderState.ReadHeader) { throw new InvalidOperationException(string.Format("Cannot read header at state {0}", State)); } var header = new ID3v2Header(); if (!header.Parse(m_Reader)) { tagData = null; return(null); } m_BodyBytes = header.BodySize; if ((header.Flags & ID3v2HeaderFlags.Footer) != 0) { m_BodyBytes -= 10; } State++; if (header.Version < 2) { tagData = null; return(null); } tagData = m_Reader.GetBuffer(header.HeaderSize + header.BodySize); var bodyData = tagData.GetRange(header.HeaderSize); // need to unsync whole tag? if ((header.Flags & ID3v2HeaderFlags.Unsynchronisation) != 0) { bodyData = ID3v2DeUnsync.Buffer(bodyData); m_BodyBytes = bodyData.Length; } // update reader (use cached data) m_Header = header; m_Reader = new DataFrameReader(bodyData); return(header); }
bool ParseVersion4(DataFrameReader reader) { if (!reader.EnsureBuffer(4)) { return false; } // calc size int size = ID3v2DeUnsync.Int32(reader.Read(0, 4), 0); // get data if (!reader.EnsureBuffer(size)) { return false; } m_Data = reader.GetBuffer(size); // get flags m_Flags = ID3v2ExtendedHeaderFlags.FromID3v24(m_Data); return true; }
/// <summary> /// Creates a new MP3Reader for the specified stream. /// </summary> /// <param name="stream">The stream to load.</param> public MP3Reader(Stream stream) { Name = stream.ToString(); long endOfStream = 0; if (stream.CanSeek && (stream.Position == 0) && (stream.Length > 128)) { // try loading ID3v1 first try { stream.Seek(-128, SeekOrigin.End); var buffer = new byte[128]; stream.Read(buffer, 0, 128); if ((buffer[0] == (byte)'T') && (buffer[1] == (byte)'A') && (buffer[2] == (byte)'G')) { m_ID3v1 = new ID3v1(buffer); endOfStream = stream.Length - 128; } } catch { } stream.Seek(0, SeekOrigin.Begin); } m_Reader = new DataFrameReader(stream, endOfStream); }
/// <summary> /// Set up a reader with the options for a header. /// </summary> /// <param name="reader">The <see cref="DataFrameReader"/>.</param> /// <returns>The <see cref="DataFrameReader"/> with configured option.</returns> public static DataFrameReader HasHeader(this DataFrameReader reader) { return(reader.Option("header", true)); }
private void ProcessData(Context context) { if (state == WebSocketClientState.Handshaking) { // Assuming we've read the whole response, and that this response has no payload // Definitely not gaurenteed to be true var someBytes = new byte[context.ReceivedByteCount]; Array.Copy(context.Buffer, someBytes, context.ReceivedByteCount); var authenticated = CheckAuthenticationResponse(someBytes); frameReader = new DataFrameReaderRfc6455(); if (!authenticated) { Disconnect(); } else { state = WebSocketClientState.Open; context.UserContext.OnConnected(); } } else { Debug.Assert(state == WebSocketClientState.Open); frameReader.Append(context.Buffer, context.ReceivedByteCount); foreach (var frameData in frameReader.ReadFrames()) context.UserContext.OnReceive(frameData); } }
internal ID3v2Reader(DataFrameReader reader) { m_Reader = reader; }
/// <summary> /// Parses the specified buffer starting at index to load all data for this frame. /// </summary> /// <param name="reader">FrameReader to read from.</param> public abstract bool Parse(DataFrameReader reader);
/// <summary> /// Throws a NotSupportedException. /// </summary> /// <param name="reader">FrameReader to read from.</param> public override bool Parse(DataFrameReader reader) => throw new NotSupportedException();
/// <summary> /// Parses the specified stream to load all fields for this instance. /// </summary> /// <param name="reader">FrameReader to read from.</param> public override bool Parse(DataFrameReader reader) { if (reader == null) { throw new ArgumentNullException("Reader"); } if (!reader.EnsureBuffer(4)) { return(false); } var headerData = reader.Read(0, 4); Header = new MP3AudioFrameHeader(headerData); if (Header.Validation != MP3AudioFrameHeadervalidation.Valid) { return(false); } var dataLength = Header.Length; if (dataLength == 0) { return(false); } if (!reader.EnsureBuffer(dataLength)) { return(false); } m_Data = reader.Read(0, dataLength); // check next header if (reader.EnsureBuffer(dataLength + 4)) { var nextHeaderBuffer = reader.Read(dataLength, 4); var next = new MP3AudioFrameHeader(nextHeaderBuffer); if (next.Validation != MP3AudioFrameHeadervalidation.Valid) { if ((nextHeaderBuffer[0] == 'I') && (nextHeaderBuffer[1] == 'D') && (nextHeaderBuffer[2] == '3')) { // ID3 v2 tag incoming } else if ((nextHeaderBuffer[0] == 'T') && (nextHeaderBuffer[1] == 'A') && (nextHeaderBuffer[2] == 'G')) { // ID3 v1 tag incoming } else { // next header is invalid, check if the padding bit is set incorrectly // there is a high pobability that the padding bit is invalid if // the framestart is not directly after our buffer but one byte late var newStart = dataLength + (Header.Padding ? -1 : 1); nextHeaderBuffer = reader.Read(newStart, 4); next = new MP3AudioFrameHeader(nextHeaderBuffer); if (next.Validation == MP3AudioFrameHeadervalidation.Valid) { if (!Header.Padding) { // frame has a padding byte but the header padding bit is not set m_Data = reader.Read(0, newStart); } else { // frame has no padding byte but the header padding bit is set m_Data = reader.Read(0, newStart); } InvalidPaddingCorrected = true; } } } } reader.Remove(m_Data.Length); return(true); }
bool ParseVersion2(DataFrameReader reader) { throw new NotImplementedException("TODO"); }