public static PSchema InferParquetSchema(this JObject jObject) { var schemaExtractor = new JsonSchemaInferring(); PSchema schema = schemaExtractor.InferSchema(new[] { jObject }); return(schema); }
public void Infer_different_schemas() { var inferrer = new JsonSchemaInferring(); JObject doc1 = JObject.Parse(ReadJson("infer00.json")); JObject doc2 = JObject.Parse(ReadJson("infer01.json")); Schema schema = inferrer.InferSchema(new[] { doc1, doc2 }); Assert.Equal( new Schema( new DataField <int?>("id"), new DataField <string>("country"), new StructField("population", new DataField <int?>("year"), new DataField <int?>("amount"), new DataField <int?>("diff")), new DataField <string>("comment")), schema); }
//[Fact] public void TempTest() { var dir = new DirectoryInfo(@"C:\Users\ivang\Downloads\Fullfeed-20170330004044"); FileInfo[] files = dir.GetFiles(); JObject[] jos = files .Select(fi => JObject.Parse(System.IO.File.ReadAllText(fi.FullName))) .Take(1000) .ToArray(); var inferrer = new JsonSchemaInferring(); Schema schema = inferrer.InferSchema(jos); var extractor = new JsonDataExtractor(schema); var ds = new DataSet(schema); for (int i = 0; i < jos.Length; i++) { extractor.AddRow(ds, jos[i]); } ParquetWriter.WriteFile(ds, "c:\\tmp\\com.parquet"); }