Skip to content

[Bug] MATERIALIZED VIEW Partition Compensation Rewriting FailSummary #59225

@ScalaFirst

Description

@ScalaFirst

Search before asking

  • I had searched in the issues and found no similar issues.

Version

3.1.3

What's Wrong?

Image

select base table group by xxx, can not use MaterializedViewRewrite.

FailSummary: View struct info is invalid, need compensate union all, but can not, because the query structInfo.

show create table "base table" is :

CREATE TABLE stat_engine (
  event_hour datetime NOT NULL,
  channel_placement_name varchar(255) NULL,
  host varchar(255) NULL,
  bucket varchar(255) NULL,
  mid varchar(255) NULL,
  user_id varchar(255) NULL,
  user_name varchar(255) NULL,
  app_id varchar(255) NULL,
  app_name varchar(255) NULL,
  app_placement_id varchar(255) NULL,
  app_placement_name varchar(255) NULL,
  app_bundle varchar(255) NULL,
  app_version varchar(255) NULL,
  channel_id varchar(255) NULL,
  channel_name varchar(255) NULL,
  channel_placement_id varchar(255) NULL,
  ad_type varchar(255) NULL,
  os varchar(50) NULL,
  dlp varchar(255) NULL,
  lpp varchar(255) NULL,
  bct varchar(255) NULL,
  campaign_id varchar(255) NULL,
  make varchar(255) NULL,
  model varchar(255) NULL,
  pub_bid_floor_range varchar(255) NULL,
  adv_bid_floor_range varchar(255) NULL,
  pub_bid_price_range varchar(255) NULL,
  adv_bid_price_range varchar(255) NULL,
  request_num bigint NULL DEFAULT "0",
  fill_num bigint NULL DEFAULT "0",
  pub_bid_floor decimal(38,4) NULL DEFAULT "0.0",
  adv_bid_floor decimal(38,4) NULL DEFAULT "0.0",
  adv_bid_price decimal(38,4) NULL DEFAULT "0.0",
  pub_bid_price decimal(38,4) NULL DEFAULT "0.0",
  ssp_request_num bigint NULL DEFAULT "0",
  ssp_forward_num bigint NULL DEFAULT "0",
  ssp_forbidden_num bigint NULL DEFAULT "0",
  ssp_timeout_num bigint NULL DEFAULT "0",
  ssp_fill_num bigint NULL DEFAULT "0",
  impr_num bigint NULL DEFAULT "0",
  raw_impr_num bigint NULL DEFAULT "0",
  click_num bigint NULL DEFAULT "0",
  raw_click_num bigint NULL DEFAULT "0",
  drop_click_num bigint NULL DEFAULT "0",
  raw_drop_click_num bigint NULL DEFAULT "0",
  invoke_num bigint NULL DEFAULT "0",
  raw_invoke_num bigint NULL DEFAULT "0",
  income decimal(38,4) NULL DEFAULT "0.0",
  upstream_income decimal(38,4) NULL DEFAULT "0.0",
  earnings decimal(38,4) NULL DEFAULT "0.0",
  upstream_ratio decimal(38,4) NULL DEFAULT "0.0",
  qh_invoke_num bigint NULL DEFAULT "0",
  qh_dau_invoke_num bigint NULL DEFAULT "0",
  qh_no_client_cvr_num bigint NULL DEFAULT "0",
  qh_delayed_attributed_purchase_num bigint NULL DEFAULT "0",
  qh_delayed_unattributed_purchase_num bigint NULL DEFAULT "0",
  qh_no_delay_attributed_purchase_num bigint NULL DEFAULT "0",
  qh_no_delay_invoke_num bigint NULL DEFAULT "0",
  qh_client_cvr_attributed_purchase_num bigint NULL DEFAULT "0" COMMENT "",
  qh_flash_mac_num bigint NULL DEFAULT "0" COMMENT "",
  br_callback bigint NULL DEFAULT "0" COMMENT "",
  cvr79_num bigint NULL DEFAULT "0" COMMENT "",
  qh_flash110_num bigint NULL DEFAULT "0" COMMENT "",
  INDEX idx_user_name (user_name) USING INVERTED,
  INDEX idx_channel_name (channel_name) USING INVERTED,
  INDEX idx_channel_placement_name (channel_placement_name) USING INVERTED,
  INDEX idx_app_placement_id (app_placement_id) USING INVERTED,
  INDEX idx_app_bundle (app_bundle) USING INVERTED,
  INDEX idx_channel_placement_id (channel_placement_id) USING INVERTED,
  INDEX idx_app_placement_name (app_placement_name) USING INVERTED
) ENGINE=OLAP
DUPLICATE KEY(event_hour, channel_placement_name)
AUTO PARTITION BY RANGE (date_trunc(event_hour, 'hour'))
(...,PARTITION p20251222080000 VALUES [('2025-12-22 08:00:00'), ('2025-12-22 09:00:00')),...)
DISTRIBUTED BY HASH(app_placement_id, app_bundle, channel_placement_id) BUCKETS 16
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"storage_medium" = "hdd",
"storage_format" = "V2",
"inverted_index_storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728"
;

show MATERIALIZED VIEW is :

CREATE MATERIALIZED VIEW IF NOT EXISTS `dwd_view_stat_engine_agg_hour_demension_normal` 
BUILD IMMEDIATE REFRESH AUTO ON SCHEDULE EVERY 1 hour starts '2025-12-22 19:10:00'
DUPLICATE KEY(`event_hour`,`channel_placement_name`,`host`)
PARTITION BY (event_hour)
DISTRIBUTED BY HASH(`channel_placement_name`, `app_placement_name`) BUCKETS AUTO
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"grace_period" = "300",
"use_for_rewrite" = "true",
"workload_group" = "scheduler_group"
) as 
SELECT
    date_trunc(event_hour,'hour') as event_hour,                      -- 1  
    channel_placement_name,          -- 2
    any_value(host) as host,                            -- 3
    any_value(bucket) as bucket,                          -- 4
    mid,                             -- 5
    user_id,  -- 6
    user_name,-- 7
    app_id,                          -- 8
    app_name,                        -- 9
    app_placement_id,                --10
    app_placement_name,              --11
    app_bundle,                      --12
    app_version,                     --13
    channel_id,                      --14
    channel_name,                    --15
    channel_placement_id,            --16
    ad_type,                         --17
    os,                              --18
    dlp,                             --19
    lpp,                             --20
    bct,                             --21
    campaign_id,                     --22
    any_value(make) as make,                            --23
    any_value(model) as model,                           --24
    pub_bid_floor_range,             --25
    adv_bid_floor_range,             --26
    pub_bid_price_range,             --27
    adv_bid_price_range,             --28
    SUM(request_num)              AS request_num,
    SUM(fill_num)                 AS fill_num,
    SUM(pub_bid_floor)            AS pub_bid_floor,
    SUM(adv_bid_floor)            AS adv_bid_floor,
    SUM(adv_bid_price)            AS adv_bid_price,
    SUM(pub_bid_price)            AS pub_bid_price,
    SUM(ssp_request_num)          AS ssp_request_num,
    SUM(ssp_forward_num)          AS ssp_forward_num,
    SUM(ssp_forbidden_num)        AS ssp_forbidden_num,
    SUM(ssp_timeout_num)          AS ssp_timeout_num,
    SUM(ssp_fill_num)             AS ssp_fill_num,
    SUM(impr_num)                 AS impr_num,
    SUM(raw_impr_num)             AS raw_impr_num,
    SUM(click_num)                AS click_num,
    SUM(raw_click_num)            AS raw_click_num,
    SUM(drop_click_num)           AS drop_click_num,
    SUM(raw_drop_click_num)       AS raw_drop_click_num,
    SUM(invoke_num)               AS invoke_num,
    SUM(raw_invoke_num)           AS raw_invoke_num,
    SUM(income)                   AS income,
    SUM(upstream_income)          AS upstream_income,
    SUM(earnings)                 AS earnings,
    SUM(upstream_ratio)           AS upstream_ratio,
    SUM(qh_invoke_num)            AS qh_invoke_num,
    SUM(qh_dau_invoke_num)        AS qh_dau_invoke_num,
    SUM(qh_no_client_cvr_num)     AS qh_no_client_cvr_num,
    SUM(qh_delayed_attributed_purchase_num)    AS qh_delayed_attributed_purchase_num,
    SUM(qh_delayed_unattributed_purchase_num)  AS qh_delayed_unattributed_purchase_num,
    SUM(qh_no_delay_attributed_purchase_num)   AS qh_no_delay_attributed_purchase_num,
    SUM(qh_no_delay_invoke_num)   AS qh_no_delay_invoke_num,
    SUM(qh_client_cvr_attributed_purchase_num) AS qh_client_cvr_attributed_purchase_num,
    SUM(qh_flash_mac_num)         AS qh_flash_mac_num,
    SUM(br_callback)              AS br_callback,
    SUM(cvr79_num)                AS cvr79_num,
    SUM(qh_flash110_num)          AS qh_flash110_num
FROM stat_engine
GROUP BY
    date_trunc(event_hour,'hour'),
    channel_placement_name,
    mid,
    user_id,  -- 6
    user_name,-- 7
    app_id,
    app_name,
    app_placement_id,
    app_placement_name,
    app_bundle,
    app_version,
    channel_id,
    channel_name,
    channel_placement_id,
    ad_type,
    os,
    dlp,
    lpp,
    bct,
    campaign_id,
    pub_bid_floor_range,
    adv_bid_floor_range,
    pub_bid_price_range,
    adv_bid_price_range
;

select SQL is:

explain SELECT
((SUM(income)/SUM(ssp_forward_num))*1000000) AS sumrequ-83f,
SUM(income) AS sum_income,
((SUM(income)/SUM(impr_num))*1000) AS suminco-4f4,
((SUM(earnings)/SUM(impr_num))*1000) AS suminco-5f8,
SUM(earnings) AS sum_earnings,
SUM(request_num) AS sumimpr-604,
SUM(fill_num) AS sumimpr-850,
((SUM(fill_num)/SUM(request_num))*100) AS sumfill-233,
SUM(impr_num) AS count,
((SUM(impr_num)/SUM(fill_num))*100) AS sumimpr-52a,
SUM(click_num) AS sumimpr-c22,
((SUM(adv_bid_price)/SUM(ssp_fill_num))/100) AS sum_adv_bid_price,
(SUM(income)-SUM(upstream_income)) AS sumearn-78f,
((SUM(income)-SUM(earnings))-SUM(upstream_income)) AS suminco-19e,
(1-(SUM(earnings)/SUM(income))) AS suminco-1e0
FROM stat_engine
GROUP BY ''
;

What You Expected?

query will use Partition Compensation Rewrite.

How to Reproduce?

No response

Anything Else?

by the way, when query filter refreshed partition, the query rewrite will take successed . such as "where event_hour < date_trunc(now() - interval 2 hour) -- rewrite success because view has refreshed at one hours ago"

Are you willing to submit PR?

  • Yes I am willing to submit a PR!

Code of Conduct

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions