public void TestImportFromCloudStorage()
        {
            string datasetId         = "datasetForTestImportFromCloudStorage";
            string newTableId        = "tableForTestImportFromCloudStorage";
            string jsonGcsSampleFile = "sample.json";
            string gcsFolder         = "test";
            string gcsUploadTestWord = "exampleJsonFromGCS";

            CreateDataset(datasetId, _client);
            CreateTable(datasetId, newTableId, _client);
            // Import data.
            ImportDataFromCloudStorage(_projectId, datasetId, newTableId, _client,
                                       jsonGcsSampleFile, gcsFolder);
            // Run query to get table data.
            var             newTable = _client.GetTable(datasetId, newTableId);
            string          query    = $"SELECT title, unique_words FROM {newTable}";
            BigQueryResults results  = AsyncQuery(_projectId, datasetId, newTableId,
                                                  query, _client);
            // Get first row and confirm it contains the expected value.
            var row = results.GetRows().First();

            Assert.Equal(gcsUploadTestWord, row["title"]);
            DeleteTable(datasetId, newTableId, _client);
            DeleteDataset(datasetId, _client);
        }
        public void TestAsyncQuery()
        {
            string          projectId = "bigquery-public-data";
            string          datasetId = "samples";
            string          tableId   = "shakespeare";
            var             table     = _client.GetTable(projectId, datasetId, tableId);
            string          query     = $@"SELECT corpus AS title, COUNT(*) AS unique_words FROM {table} 
                GROUP BY title ORDER BY unique_words DESC LIMIT 42";
            BigQueryResults results   = AsyncQuery(projectId, datasetId, tableId, query, _client);

            Assert.True(results.GetRows().Count() > 0);
        }
        public void TestLegacySqlSyncQuery()
        {
            string          projectId = "bigquery-public-data";
            string          datasetId = "samples";
            string          tableId   = "shakespeare";
            var             table     = _client.GetTable(projectId, datasetId, tableId);
            string          query     = $"SELECT TOP(corpus, 42) as title, COUNT(*) as unique_words FROM [{table.FullyQualifiedId}]";
            BigQueryResults results   = LegacySqlSyncQuery(
                projectId, datasetId, tableId, query, 10000, _client);

            Assert.True(results.GetRows().Count() > 0);
        }
        public void TestImportDataFromStream()
        {
            string datasetId         = "datasetForTestImportDataFromStream";
            string newTableId        = "tableForTestImportDataFromStream";
            string gcsUploadTestWord = "exampleJsonFromStream";

            CreateDataset(datasetId, _client);
            CreateTable(datasetId, newTableId, _client);
            // Import data.
            UploadJson(datasetId, newTableId, _client);
            // Query table to get first row and confirm it contains the expected value.
            var             newTable = _client.GetTable(datasetId, newTableId);
            string          query    = $"SELECT title, unique_words FROM {newTable}";
            BigQueryResults results  = AsyncQuery(_projectId, datasetId, newTableId, query, _client);
            var             row      = results.GetRows().First();

            Assert.Equal(gcsUploadTestWord, row["title"]);
            DeleteTable(datasetId, newTableId, _client);
            DeleteDataset(datasetId, _client);
        }
        public void TestImportDataFromFile()
        {
            string datasetId           = "datasetForTestImportDataFromFile";
            string newTableId          = "tableForTestImportDataFromFile";
            string uploadTestWord      = "additionalExampleJsonFromFile";
            long   uploadTestWordValue = 9814072356;
            string filePath            = "..\\..\\..\\test\\data\\sample.json";

            CreateDataset(datasetId, _client);
            CreateTable(datasetId, newTableId, _client);
            // Import data.
            UploadJsonFromFile(_projectId, datasetId, newTableId, filePath, _client);
            // Query table to get first row and confirm it contains the expected value
            var             newTable = _client.GetTable(datasetId, newTableId);
            string          query    = $"SELECT title, unique_words FROM {newTable} WHERE title = '{uploadTestWord}'";
            BigQueryResults results  = AsyncQuery(_projectId, datasetId, newTableId, query, _client);
            var             row      = results.GetRows().Last();

            Assert.Equal(uploadTestWordValue, row["unique_words"]);
            DeleteTable(datasetId, newTableId, _client);
            DeleteDataset(datasetId, _client);
        }
Beispiel #6
0
        protected override void ProcessRecord()
        {
            // Set Project for the lazy instantiation of a BQ Client object.
            Project = InputObject.ProjectId;

            var options = new GetQueryResultsOptions {
                Timeout = new TimeSpan(0, 0, (Timeout < 10) ? 10 : Timeout)
            };

            try
            {
                BigQueryResults result = Client.GetQueryResults(InputObject, options);
                if (result == null)
                {
                    throw new Exception("Server response came back as null.");
                }
                WriteObject(result.GetRows(), true);
            }
            catch (Exception ex)
            {
                ThrowTerminatingError(new ErrorRecord(ex, "Failed to receive results.",
                                                      ErrorCategory.InvalidOperation, this));
            }
        }
Beispiel #7
0
        public static void Main(string[] args)
        {
            //format should be YYMM-comments.json"
            Stack <string> dbDates = new Stack <string>(new List <string>()
            {
                "16_07",
                "16_08",
                "16_09",
                "16_10",
                "16_11",
                "16_12",
                "17_01",
                "17_02",
                "17_03",
                "17_04",
                "17_05",
                "17_06",
            });

            string dbDate    = "17_06";
            string subreddit = "Seattle";
            string dataset   = "posts";
            //string dataset = "comments";

            BigQueryClient   client = BigQueryClient.Create("aaaa-153204");
            ProjectReference pr     = client.GetProjectReference("fh-bigquery");


            DatasetReference dr = new DatasetReference()
            {
                DatasetId = $"reddit_{dataset}", ProjectId = pr.ProjectId
            };

            JsonSerializerSettings serializerSettings = new JsonSerializerSettings()
            {
                NullValueHandling = NullValueHandling.Include
            };

            while (dbDates.Count > 0)
            {
                dbDate = dbDates.Pop();


                string          fileName = $"{dbDate.Replace("_", "")}-{dataset}.json";
                string          query    = $@"SELECT * FROM `fh-bigquery.reddit_{dataset}.20{dbDate}` WHERE subreddit = '{subreddit}'";
                BigQueryResults result   =
                    client.ExecuteQuery
                    //(@"SELECT count(1) FROM `fh-bigquery.reddit_comments.2016_11` WHERE subreddit = 'SeattleWA'",
                        (query,
                        new ExecuteQueryOptions()
                {
                    DefaultDataset = new DatasetReference()
                    {
                        DatasetId = $"reddit_{dataset}",
                        ProjectId = "fh-bigquery"
                    }
                });

                //, new ExecuteQueryOptions() { UseQueryCache = true, DefaultDataset = new DatasetReference() { ProjectId = "fh-bigquery", DatasetId = "reddit_comments" } });

                LinkedList <string> rows = new LinkedList <string>();
                while (!result.Completed)
                {
                    Console.WriteLine("Polling for completed query");
                    result = result.PollUntilCompleted();
                }



                foreach (BigQueryRow row in result.GetRows(new Google.Api.Gax.PollSettings(
                                                               Google.Api.Gax.Expiration.None, new TimeSpan(0, 0, 15))))
                {
                    if (dataset == "comments")
                    {
                        object rowObj = new
                        {
                            body              = row["body"],
                            score_hidden      = (bool?)row["score_hidden"],
                            archived          = (bool?)row["archived"],
                            name              = row["name"],
                            author            = row["author"],
                            author_flair_text = row["author_flair_text"],
                            downs             = (long?)row["downs"],
                            created_utc       = (long?)row["created_utc"],
                            subreddit_id      = row["subreddit_id"],
                            link_id           = row["link_id"],
                            parent_id         = row["parent_id"],
                            score             = (long?)row["score"],
                            retrieved_on      = (long?)row["retrieved_on"],
                            controversiality  = (long?)row["controversiality"],
                            gilded            = (long?)row["gilded"],
                            id                     = row["id"],
                            subreddit              = row["subreddit"],
                            ups                    = (long?)row["ups"],
                            distinguished          = row["distinguished"],
                            author_flair_css_class = row["author_flair_css_class"]
                        };



                        rows.AddLast(
                            JsonConvert.SerializeObject(rowObj,
                                                        Formatting.None,
                                                        serializerSettings
                                                        ));
                    }
                    else
                    {
                        object rowPostObj = new
                        {
                            created_utc            = (long?)row["created_utc"],
                            subreddit              = row["subreddit"],
                            author                 = row["author"],
                            domain                 = row["domain"],
                            url                    = row["url"],
                            num_comments           = (long?)row["num_comments"],
                            score                  = (long?)row["score"],
                            ups                    = (long?)row["ups"],
                            downs                  = (long?)row["downs"],
                            title                  = row["title"],
                            selftext               = row["selftext"],
                            saved                  = (bool?)row["saved"],
                            id                     = row["id"],
                            from_kind              = row["from_kind"],
                            gilded                 = (long?)row["gilded"],
                            from                   = row["from"],
                            stickied               = (bool?)row["stickied"],
                            retrieved_on           = (long?)row["retrieved_on"],
                            over_18                = (bool?)row["over_18"],
                            thumbnail              = row["thumbnail"],
                            subreddit_id           = row["subreddit_id"],
                            hide_score             = (bool?)row["hide_score"],
                            link_flair_css_class   = row["link_flair_css_class"],
                            author_flair_css_class = row["author_flair_css_class"],
                            archived               = (bool?)row["archived"],
                            is_self                = (bool?)row["is_self"],
                            from_id                = row["from_id"],
                            permalink              = row["permalink"],
                            name                   = row["name"],
                            author_flair_text      = row["author_flair_text"],
                            quarantine             = (bool?)row["quarantine"],
                            link_flair_text        = row["link_flair_text"],
                            distinguished          = row["distinguished"]
                        };
                        rows.AddLast(
                            JsonConvert.SerializeObject(rowPostObj,
                                                        Formatting.None,
                                                        serializerSettings
                                                        ));
                    }

                    //    string.Join(",", row.RawRow.F.Select(x => x.V != null ? x.V.ToString(): "null"))
                    //);
                }
                System.IO.File.WriteAllLines(@"D:\dev\data\" + dataset + "\\" + subreddit + "\\" + fileName, rows);
            }
            Console.WriteLine("Complete");

            Console.ReadLine();
        }