hene

hene.dev

BigQuery の課金されるバイト数をモニタリングするクエリの作成

BigQuery の課金されるバイト数をモニタリングするクエリの作成

BigQuery のコストが増えてきたため、下記のディメンション別で課金されるバイト数をモニタリングすることにした。

  • プロジェクト別課金されるバイト数
  • ユーザー別課金されるバイト数
  • クエリ実行日とユーザー別課金されるバイト数
  • クエリ別課金されるバイト数

弊社の環境

GCP の複数のプロジェクトで BigQuery を利用している。

  • project_a
  • project_b
  • project_c
  • ...

region は、すべて asig-northeast1 で統一している。

データの参照元

INFORMATION_SCHEMA.JOBS_BY_PROJECT を参照する。 課金されるバイト数を含む、BigQuery ジョブに関するリアルタイムのメタデータを取得できる。

クエリ

プロジェクト別課金されるバイト数

with project_a as (
  select
    cast(
      format_timestamp(
        '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
      ) as datetime
    ) as creation_time
    , project_id
    , user_email
    , job_id
    , job_type
    , statement_type
    , priority
    , query
    , referenced_tables
    , state
    , total_bytes_billed
  from `project_a`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
)

, project_b as (
  select
    cast(
      format_timestamp(
        '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
      ) as datetime
    ) as creation_time
    , project_id
    , user_email
    , job_id
    , job_type
    , statement_type
    , priority
    , query
    , referenced_tables
    , state
    , total_bytes_billed
  from `project_b`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
)

, project_c as (
  select
    cast(
      format_timestamp(
        '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
      ) as datetime
    ) as creation_time
    , project_id
    , user_email
    , job_id
    , job_type
    , statement_type
    , priority
    , query
    , referenced_tables
    , state
    , total_bytes_billed
  from `project_c`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
)

-- , project_d as (
--   select
--     cast(
--       format_timestamp(
--         '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
--       ) as datetime
--     ) as creation_time
--     , project_id
--     , user_email
--     , job_id
--     , job_type
--     , statement_type
--     , priority
--     , query
--     , referenced_tables
--     , state
--     , total_bytes_billed
--   from `project_d`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
-- )

, unioned as (
  select * from project_a
  union all
  select * from project_b
  union all
  select * from project_c
  -- union all
  -- select * from project_d
)

, calculated as (
  select
    cast(
      datetime_trunc(creation_time, month) as date
    ) as creation_month
    , project_id
    , job_type
    , count(*) as job_count
    , max(total_bytes_billed) as max_total_bytes_billed
    , sum(total_bytes_billed) as sum_total_bytes_billed
  from unioned
  group by
    creation_month
    , project_id
    , job_type
)

, final as (
  select
    creation_month
    , project_id
    , job_type
    , job_count
    , max_total_bytes_billed
    , sum_total_bytes_billed
    , safe_divide(max_total_bytes_billed, pow(1024, 3)) as max_total_gigabytes_billed
    , safe_divide(sum_total_bytes_billed, pow(1024, 3)) as sum_total_gigabytes_billed
  from calculated
  where
    sum_total_bytes_billed is not null
    and sum_total_bytes_billed != 0
)

select
  *
from final
order by
  creation_month desc
  , sum_total_gigabytes_billed desc

ユーザー別課金されるバイト数

with project_a as (
  select
    cast(
      format_timestamp(
        '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
      ) as datetime
    ) as creation_time
    , project_id
    , user_email
    , job_id
    , job_type
    , statement_type
    , priority
    , query
    , referenced_tables
    , state
    , total_bytes_billed
  from `project_a`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
)

, project_b as (
  select
    cast(
      format_timestamp(
        '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
      ) as datetime
    ) as creation_time
    , project_id
    , user_email
    , job_id
    , job_type
    , statement_type
    , priority
    , query
    , referenced_tables
    , state
    , total_bytes_billed
  from `project_b`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
)

, project_c as (
  select
    cast(
      format_timestamp(
        '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
      ) as datetime
    ) as creation_time
    , project_id
    , user_email
    , job_id
    , job_type
    , statement_type
    , priority
    , query
    , referenced_tables
    , state
    , total_bytes_billed
  from `project_c`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
)

-- , project_d as (
--   select
--     cast(
--       format_timestamp(
--         '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
--       ) as datetime
--     ) as creation_time
--     , project_id
--     , user_email
--     , job_id
--     , job_type
--     , statement_type
--     , priority
--     , query
--     , referenced_tables
--     , state
--     , total_bytes_billed
--   from `project_d`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
-- )

, unioned as (
  select * from project_a
  union all
  select * from project_b
  union all
  select * from project_c
  -- union all
  -- select * from project_d
)

, calculated as (
  select
    cast(
      datetime_trunc(creation_time, month) as date
    ) as creation_month
    , project_id
    , user_email
    , job_type
    , count(*) as job_count
    , max(total_bytes_billed) as max_total_bytes_billed
    , sum(total_bytes_billed) as sum_total_bytes_billed
  from unioned
  group by
    creation_month
    , project_id
    , user_email
    , job_type
)

, final as (
  select
    creation_month
    , project_id
    , user_email
    , job_type
    , job_count
    , max_total_bytes_billed
    , sum_total_bytes_billed
    , safe_divide(max_total_bytes_billed, pow(1024, 3)) as max_total_gigabytes_billed
    , safe_divide(sum_total_bytes_billed, pow(1024, 3)) as sum_total_gigabytes_billed
  from calculated
  where
    sum_total_bytes_billed is not null
    and sum_total_bytes_billed != 0
)

select
  *
from final
order by
  creation_month desc
  , sum_total_gigabytes_billed desc

クエリ実行日とユーザー別課金されるバイト数

with project_a as (
  select
    cast(
      format_timestamp(
        '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
      ) as datetime
    ) as creation_time
    , project_id
    , user_email
    , job_id
    , job_type
    , statement_type
    , priority
    , query
    , referenced_tables
    , state
    , total_bytes_billed
  from `project_a`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
)

, project_b as (
  select
    cast(
      format_timestamp(
        '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
      ) as datetime
    ) as creation_time
    , project_id
    , user_email
    , job_id
    , job_type
    , statement_type
    , priority
    , query
    , referenced_tables
    , state
    , total_bytes_billed
  from `project_b`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
)

, project_c as (
  select
    cast(
      format_timestamp(
        '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
      ) as datetime
    ) as creation_time
    , project_id
    , user_email
    , job_id
    , job_type
    , statement_type
    , priority
    , query
    , referenced_tables
    , state
    , total_bytes_billed
  from `project_c`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
)

-- , project_d as (
--   select
--     cast(
--       format_timestamp(
--         '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
--       ) as datetime
--     ) as creation_time
--     , project_id
--     , user_email
--     , job_id
--     , job_type
--     , statement_type
--     , priority
--     , query
--     , referenced_tables
--     , state
--     , total_bytes_billed
--   from `project_d`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
-- )

, unioned as (
  select * from project_a
  union all
  select * from project_b
  union all
  select * from project_c
  -- union all
  -- select * from project_d
)

, calculated as (
  select
    cast(
      datetime_trunc(creation_time, day) as date
    ) as creation_day
    , project_id
    , user_email
    , job_type
    , count(*) as job_count
    , max(total_bytes_billed) as max_total_bytes_billed
    , sum(total_bytes_billed) as sum_total_bytes_billed
  from unioned
  group by
    creation_day
    , project_id
    , user_email
    , job_type
)

, final as (
  select
    creation_day
    , project_id
    , user_email
    , job_type
    , job_count
    , max_total_bytes_billed
    , sum_total_bytes_billed
    , safe_divide(max_total_bytes_billed, pow(1024, 3)) as max_total_gigabytes_billed
    , safe_divide(sum_total_bytes_billed, pow(1024, 3)) as sum_total_gigabytes_billed
  from calculated
  where
    sum_total_bytes_billed is not null
    and sum_total_bytes_billed != 0
)

select
  *
from final
order by
  creation_day desc
  , sum_total_gigabytes_billed desc

クエリ別課金されるバイト数

with project_a as (
  select
    cast(
      format_timestamp(
        '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
      ) as datetime
    ) as creation_time
    , project_id
    , user_email
    , job_id
    , job_type
    , statement_type
    , priority
    , query
    , referenced_tables
    , state
    , total_bytes_billed
  from `project_a`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
)

, project_b as (
  select
    cast(
      format_timestamp(
        '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
      ) as datetime
    ) as creation_time
    , project_id
    , user_email
    , job_id
    , job_type
    , statement_type
    , priority
    , query
    , referenced_tables
    , state
    , total_bytes_billed
  from `project_b`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
)

, project_c as (
  select
    cast(
      format_timestamp(
        '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
      ) as datetime
    ) as creation_time
    , project_id
    , user_email
    , job_id
    , job_type
    , statement_type
    , priority
    , query
    , referenced_tables
    , state
    , total_bytes_billed
  from `project_c`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
)

-- , project_d as (
--   select
--     cast(
--       format_timestamp(
--         '%Y-%m-%d %H:%M:%S', creation_time, 'Asia/Tokyo'
--       ) as datetime
--     ) as creation_time
--     , project_id
--     , user_email
--     , job_id
--     , job_type
--     , statement_type
--     , priority
--     , query
--     , referenced_tables
--     , state
--     , total_bytes_billed
--   from `project_d`.`region-asia-northeast1.INFORMATION_SCHEMA`.`JOBS_BY_PROJECT`
-- )

, unioned as (
  select * from project_a
  union all
  select * from project_b
  union all
  select * from project_c
  -- union all
  -- select * from project_d
)

, final as (
  select
    cast(
      datetime_trunc(creation_time, day) as date
    ) as creation_day
    , creation_time
    , project_id
    , user_email
    , job_id
    , job_type
    , statement_type
    , priority
    , query
    , referenced_tables
    , state
    , total_bytes_billed
    , safe_divide(total_bytes_billed, pow(1024, 3)) as total_gigabytes_billed
  from unioned
  where
    total_bytes_billed is not null
    and total_bytes_billed != 0
)

select
  *
from final
order by
  creation_day desc
  , total_bytes_billed desc

今後やりたいこと

  • Slack 通知
  • 課金されるバイト数の上限値の設定

参考

関連記事