示例#1
0
        public static PSchema InferParquetSchema(this JObject jObject)
        {
            var     schemaExtractor = new JsonSchemaInferring();
            PSchema schema          = schemaExtractor.InferSchema(new[] { jObject });

            return(schema);
        }
        public void Infer_different_schemas()
        {
            var inferrer = new JsonSchemaInferring();

            JObject doc1 = JObject.Parse(ReadJson("infer00.json"));
            JObject doc2 = JObject.Parse(ReadJson("infer01.json"));

            Schema schema = inferrer.InferSchema(new[] { doc1, doc2 });

            Assert.Equal(
                new Schema(
                    new DataField <int?>("id"),
                    new DataField <string>("country"),
                    new StructField("population",
                                    new DataField <int?>("year"),
                                    new DataField <int?>("amount"),
                                    new DataField <int?>("diff")),
                    new DataField <string>("comment")),
                schema);
        }
示例#3
0
        //[Fact]
        public void TempTest()
        {
            var dir = new DirectoryInfo(@"C:\Users\ivang\Downloads\Fullfeed-20170330004044");

            FileInfo[] files = dir.GetFiles();
            JObject[]  jos   = files
                               .Select(fi => JObject.Parse(System.IO.File.ReadAllText(fi.FullName)))
                               .Take(1000)
                               .ToArray();

            var    inferrer = new JsonSchemaInferring();
            Schema schema   = inferrer.InferSchema(jos);

            var extractor = new JsonDataExtractor(schema);
            var ds        = new DataSet(schema);

            for (int i = 0; i < jos.Length; i++)
            {
                extractor.AddRow(ds, jos[i]);
            }

            ParquetWriter.WriteFile(ds, "c:\\tmp\\com.parquet");
        }