Exemplo n.º 1
0
        public void Read_multiple_with_missing_data()
        {
            var schema = new Schema(
                new DataField <int?>("id"),
                new DataField <string>("country"),
                new StructField("population",
                                new DataField <int?>("year"),
                                new DataField <int?>("amount"),
                                new DataField <int?>("diff")),
                new DataField <string>("comment"));

            var     extractor = new JsonDataExtractor(schema);
            JObject doc1      = JObject.Parse(ReadJson("infer00.json"));
            JObject doc2      = JObject.Parse(ReadJson("infer01.json"));
            var     ds        = new DataSet(schema);

            extractor.AddRow(ds, doc1);
            extractor.AddRow(ds, doc2);

            Assert.Equal(2, ds.RowCount);
            Assert.Equal("{123;UK;{2016;111;<null>};<null>}", ds[0].ToString());
            Assert.Equal("{123;UK;{2017;222;111};no comments}", ds[1].ToString());
        }
Exemplo n.º 2
0
        public static DataSet ToParquetDataSet(this JObject jObject, PSchema schema)
        {
            if (schema == null)
            {
                throw new ArgumentNullException(nameof(schema));
            }

            //convert data
            var dataExtractor = new JsonDataExtractor(schema);
            var ds            = new DataSet(schema);

            dataExtractor.AddRow(ds, jObject);

            return(ds);
        }
        public static DataSet ToParquetDataSet(this JObject jObject)
        {
            //extract schema
            var schemaExtractor = new JsonSchemaExtractor();

            schemaExtractor.Analyze(jObject);
            PSchema schema = schemaExtractor.GetSchema();

            //convert data
            var dataExtractor = new JsonDataExtractor(schema);
            var ds            = new DataSet(schema);

            dataExtractor.AddRow(ds, jObject);

            return(ds);
        }
Exemplo n.º 4
0
        //[Fact]
        public void TempTest()
        {
            var dir = new DirectoryInfo(@"C:\Users\ivang\Downloads\Fullfeed-20170330004044");

            FileInfo[] files = dir.GetFiles();
            JObject[]  jos   = files
                               .Select(fi => JObject.Parse(System.IO.File.ReadAllText(fi.FullName)))
                               .Take(1000)
                               .ToArray();

            var    inferrer = new JsonSchemaInferring();
            Schema schema   = inferrer.InferSchema(jos);

            var extractor = new JsonDataExtractor(schema);
            var ds        = new DataSet(schema);

            for (int i = 0; i < jos.Length; i++)
            {
                extractor.AddRow(ds, jos[i]);
            }

            ParquetWriter.WriteFile(ds, "c:\\tmp\\com.parquet");
        }