diff --git a/src/backend/optimizer/plan/aqumv.c b/src/backend/optimizer/plan/aqumv.c index 4a576061780..2084bcbc181 100644 --- a/src/backend/optimizer/plan/aqumv.c +++ b/src/backend/optimizer/plan/aqumv.c @@ -996,3 +996,348 @@ groupby_query_rewrite(PlannerInfo *subroot, subroot->append_rel_list = NIL; return true; } + +/* + * aqumv_query_is_exact_match + * + * Compare two Query trees for semantic identity. Both should be at the + * same preprocessing stage (raw parser output). Returns true only if + * they are structurally identical in all query-semantics fields. + */ +static bool +aqumv_query_is_exact_match(Query *raw_parse, Query *viewQuery) +{ + /* Both must be CMD_SELECT */ + if (raw_parse->commandType != CMD_SELECT || + viewQuery->commandType != CMD_SELECT) + return false; + + /* Same number of range table entries */ + if (list_length(raw_parse->rtable) != list_length(viewQuery->rtable)) + return false; + + /* Compare range tables (table OIDs, join types, aliases structure) */ + if (!equal(raw_parse->rtable, viewQuery->rtable)) + return false; + + /* Compare join tree (FROM clause + WHERE quals) */ + if (!equal(raw_parse->jointree, viewQuery->jointree)) + return false; + + /* Compare target list entries: expressions and sort/group refs */ + if (list_length(raw_parse->targetList) != list_length(viewQuery->targetList)) + return false; + { + ListCell *lc1, *lc2; + forboth(lc1, raw_parse->targetList, lc2, viewQuery->targetList) + { + TargetEntry *tle1 = lfirst_node(TargetEntry, lc1); + TargetEntry *tle2 = lfirst_node(TargetEntry, lc2); + if (!equal(tle1->expr, tle2->expr)) + return false; + if (tle1->resjunk != tle2->resjunk) + return false; + if (tle1->ressortgroupref != tle2->ressortgroupref) + return false; + } + } + + /* Compare GROUP BY, HAVING, ORDER BY, DISTINCT, LIMIT */ + if (!equal(raw_parse->groupClause, viewQuery->groupClause)) + return false; + if (!equal(raw_parse->havingQual, viewQuery->havingQual)) + return false; + if (!equal(raw_parse->sortClause, viewQuery->sortClause)) + return false; + if (!equal(raw_parse->distinctClause, viewQuery->distinctClause)) + return false; + if (!equal(raw_parse->limitCount, viewQuery->limitCount)) + return false; + if (!equal(raw_parse->limitOffset, viewQuery->limitOffset)) + return false; + + /* Compare boolean flags */ + if (raw_parse->hasAggs != viewQuery->hasAggs) + return false; + if (raw_parse->hasWindowFuncs != viewQuery->hasWindowFuncs) + return false; + if (raw_parse->hasDistinctOn != viewQuery->hasDistinctOn) + return false; + + return true; +} + +/* + * answer_query_using_materialized_views_for_join + * + * Handle multi-table JOIN queries via exact-match comparison. + * This is completely independent from the single-table AQUMV code path. + * + * We compare the saved raw parse tree (before any planner preprocessing) + * against the stored viewQuery from gp_matview_aux. On exact match, + * rewrite the query to a simple SELECT FROM mv. + */ +RelOptInfo * +answer_query_using_materialized_views_for_join(PlannerInfo *root, AqumvContext aqumv_context) +{ + RelOptInfo *current_rel = aqumv_context->current_rel; + query_pathkeys_callback qp_callback = aqumv_context->qp_callback; + Query *parse = root->parse; + Query *raw_parse = root->aqumv_raw_parse; + RelOptInfo *mv_final_rel = current_rel; + Relation matviewRel; + Relation mvauxDesc; + TupleDesc mvaux_tupdesc; + SysScanDesc mvscan; + HeapTuple tup; + Form_gp_matview_aux mvaux_tup; + bool need_close = false; + + /* Must have the saved raw parse tree. */ + if (raw_parse == NULL) + return mv_final_rel; + + /* Must be a join query (more than one table in FROM). */ + if (list_length(raw_parse->rtable) <= 1) + return mv_final_rel; + + /* Basic eligibility checks (same as single-table AQUMV). */ + if (parse->commandType != CMD_SELECT || + parse->rowMarks != NIL || + parse->scatterClause != NIL || + parse->cteList != NIL || + parse->setOperations != NULL || + parse->hasModifyingCTE || + parse->parentStmtType == PARENTSTMTTYPE_REFRESH_MATVIEW || + parse->parentStmtType == PARENTSTMTTYPE_CTAS || + contain_mutable_functions((Node *) raw_parse) || + parse->hasSubLinks) + return mv_final_rel; + + mvauxDesc = table_open(GpMatviewAuxId, AccessShareLock); + mvaux_tupdesc = RelationGetDescr(mvauxDesc); + + mvscan = systable_beginscan(mvauxDesc, InvalidOid, false, + NULL, 0, NULL); + + while (HeapTupleIsValid(tup = systable_getnext(mvscan))) + { + Datum view_query_datum; + char *view_query_str; + bool is_null; + Query *viewQuery; + RangeTblEntry *mvrte; + PlannerInfo *subroot; + TupleDesc mv_tupdesc; + + CHECK_FOR_INTERRUPTS(); + if (need_close) + table_close(matviewRel, AccessShareLock); + + mvaux_tup = (Form_gp_matview_aux) GETSTRUCT(tup); + matviewRel = table_open(mvaux_tup->mvoid, AccessShareLock); + need_close = true; + + if (!RelationIsPopulated(matviewRel)) + continue; + + /* MV must be up-to-date (IVM is always current). */ + if (!RelationIsIVM(matviewRel) && + !MatviewIsGeneralyUpToDate(RelationGetRelid(matviewRel))) + continue; + + /* Get a copy of view query. */ + view_query_datum = heap_getattr(tup, + Anum_gp_matview_aux_view_query, + mvaux_tupdesc, + &is_null); + + view_query_str = TextDatumGetCString(view_query_datum); + viewQuery = copyObject(stringToNode(view_query_str)); + pfree(view_query_str); + Assert(IsA(viewQuery, Query)); + + /* Skip single-table viewQueries (handled by existing AQUMV). */ + if (list_length(viewQuery->rtable) <= 1) + continue; + + /* Exact match comparison between raw parse and view query. */ + if (!aqumv_query_is_exact_match(raw_parse, viewQuery)) + continue; + + /* + * We have an exact match. Rewrite viewQuery to: + * SELECT mv.col1, mv.col2, ... FROM mv + */ + mv_tupdesc = RelationGetDescr(matviewRel); + + /* Build new target list referencing MV columns. */ + { + List *new_tlist = NIL; + ListCell *lc; + int attnum = 0; + + foreach(lc, viewQuery->targetList) + { + TargetEntry *old_tle = lfirst_node(TargetEntry, lc); + TargetEntry *new_tle; + Var *newVar; + Form_pg_attribute attr; + + if (old_tle->resjunk) + continue; + + attnum++; + attr = TupleDescAttr(mv_tupdesc, attnum - 1); + + newVar = makeVar(1, + attr->attnum, + attr->atttypid, + attr->atttypmod, + attr->attcollation, + 0); + newVar->location = -1; + + new_tle = makeTargetEntry((Expr *) newVar, + (AttrNumber) attnum, + old_tle->resname, + false); + new_tlist = lappend(new_tlist, new_tle); + } + + viewQuery->targetList = new_tlist; + } + + /* Create new RTE for the MV. */ + mvrte = makeNode(RangeTblEntry); + mvrte->rtekind = RTE_RELATION; + mvrte->relid = RelationGetRelid(matviewRel); + mvrte->relkind = RELKIND_MATVIEW; + mvrte->rellockmode = AccessShareLock; + mvrte->inh = false; + mvrte->inFromCl = true; + + /* Build eref with column names from the MV's TupleDesc. */ + { + Alias *eref = makeAlias(RelationGetRelationName(matviewRel), NIL); + int i; + for (i = 0; i < mv_tupdesc->natts; i++) + { + Form_pg_attribute attr = TupleDescAttr(mv_tupdesc, i); + if (!attr->attisdropped) + eref->colnames = lappend(eref->colnames, + makeString(pstrdup(NameStr(attr->attname)))); + else + eref->colnames = lappend(eref->colnames, + makeString(pstrdup(""))); + } + mvrte->eref = eref; + mvrte->alias = makeAlias(RelationGetRelationName(matviewRel), NIL); + } + + viewQuery->rtable = list_make1(mvrte); + viewQuery->jointree = makeFromExpr(list_make1(makeNode(RangeTblRef)), NULL); + ((RangeTblRef *) linitial(viewQuery->jointree->fromlist))->rtindex = 1; + + /* Clear aggregation/grouping/sorting state — all materialized. */ + viewQuery->hasAggs = false; + viewQuery->groupClause = NIL; + viewQuery->havingQual = NULL; + viewQuery->sortClause = NIL; + viewQuery->distinctClause = NIL; + viewQuery->hasDistinctOn = false; + viewQuery->hasWindowFuncs = false; + viewQuery->hasTargetSRFs = false; + viewQuery->limitCount = parse->limitCount; + viewQuery->limitOffset = parse->limitOffset; + viewQuery->limitOption = parse->limitOption; + + /* Create subroot for planning the MV scan. */ + subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo)); + memcpy(subroot, root, sizeof(PlannerInfo)); + subroot->parent_root = root; + subroot->eq_classes = NIL; + subroot->plan_params = NIL; + subroot->outer_params = NULL; + subroot->init_plans = NIL; + subroot->agginfos = NIL; + subroot->aggtransinfos = NIL; + subroot->parse = viewQuery; + subroot->tuple_fraction = root->tuple_fraction; + subroot->limit_tuples = root->limit_tuples; + subroot->append_rel_list = NIL; + subroot->hasHavingQual = false; + subroot->hasNonPartialAggs = false; + subroot->hasNonSerialAggs = false; + subroot->numOrderedAggs = 0; + subroot->hasNonCombine = false; + subroot->numPureOrderedAggs = 0; + + subroot->processed_tlist = NIL; + preprocess_targetlist(subroot); + + /* Compute final locus for the MV scan. */ + { + PathTarget *newtarget = make_pathtarget_from_tlist(subroot->processed_tlist); + subroot->final_locus = cdbllize_get_final_locus(subroot, newtarget); + } + + /* + * Plan the MV scan. + * + * We need a clean qp_extra with no groupClause or activeWindows, + * because the rewritten viewQuery is a simple SELECT from the MV + * with no GROUP BY, windowing, etc. The standard_qp_callback uses + * qp_extra->groupClause to compute group_pathkeys, which would fail + * if it still contained the original query's GROUP BY expressions. + * + * standard_qp_extra is { List *activeWindows; List *groupClause; }, + * so a zeroed struct of that size works correctly (both fields NIL). + */ + { + char clean_qp_extra[2 * sizeof(List *)]; + memset(clean_qp_extra, 0, sizeof(clean_qp_extra)); + mv_final_rel = query_planner(subroot, qp_callback, clean_qp_extra); + } + + /* Cost-based decision: use MV only if cheaper. */ + if (mv_final_rel->cheapest_total_path->total_cost < current_rel->cheapest_total_path->total_cost) + { + root->parse = viewQuery; + root->processed_tlist = subroot->processed_tlist; + root->agginfos = subroot->agginfos; + root->aggtransinfos = subroot->aggtransinfos; + root->simple_rte_array = subroot->simple_rte_array; + root->simple_rel_array = subroot->simple_rel_array; + root->simple_rel_array_size = subroot->simple_rel_array_size; + root->hasNonPartialAggs = subroot->hasNonPartialAggs; + root->hasNonSerialAggs = subroot->hasNonSerialAggs; + root->numOrderedAggs = subroot->numOrderedAggs; + root->hasNonCombine = subroot->hasNonCombine; + root->numPureOrderedAggs = subroot->numPureOrderedAggs; + root->hasHavingQual = subroot->hasHavingQual; + root->group_pathkeys = subroot->group_pathkeys; + root->sort_pathkeys = subroot->sort_pathkeys; + root->query_pathkeys = subroot->query_pathkeys; + root->distinct_pathkeys = subroot->distinct_pathkeys; + root->eq_classes = subroot->eq_classes; + root->append_rel_list = subroot->append_rel_list; + current_rel = mv_final_rel; + table_close(matviewRel, NoLock); + need_close = false; + break; + } + else + { + /* MV is not cheaper, reset and try next. */ + mv_final_rel = current_rel; + } + } + + if (need_close) + table_close(matviewRel, AccessShareLock); + systable_endscan(mvscan); + table_close(mvauxDesc, AccessShareLock); + + return current_rel; +} diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index da5acb23ebf..89e3611c3a5 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -952,6 +952,17 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->partColsUpdated = false; root->is_correlated_subplan = false; + /* + * Save a copy of the raw parse tree for AQUMV join exact-match. + * This must be done before any preprocessing modifies the parse tree. + */ + if (Gp_role == GP_ROLE_DISPATCH && + enable_answer_query_using_materialized_views && + parent_root == NULL) + root->aqumv_raw_parse = copyObject(parse); + else + root->aqumv_raw_parse = NULL; + /* * If there is a WITH list, process each WITH query and either convert it * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it. @@ -1935,6 +1946,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* Do the real work. */ current_rel = answer_query_using_materialized_views(root, aqumv_context); + + /* Try join AQUMV if single-table didn't rewrite. */ + if (current_rel == aqumv_context->current_rel) + { + current_rel = answer_query_using_materialized_views_for_join(root, aqumv_context); + } + /* parse tree may be rewriten. */ parse = root->parse; } diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index b02a2ccf0c1..3cd21590ac9 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -505,6 +505,8 @@ struct PlannerInfo int numPureOrderedAggs; /* CDB: number that use ORDER BY/WITHIN GROUP, not counting DISTINCT */ bool hasNonCombine; /* CDB: any agg func w/o a combine func? */ bool is_from_orca; /* true if this PlannerInfo was created from Orca*/ + + Query *aqumv_raw_parse; /* Raw parse tree for AQUMV join exact-match */ }; /* diff --git a/src/include/optimizer/aqumv.h b/src/include/optimizer/aqumv.h index 6e51d4dbc92..2bb4122cf11 100644 --- a/src/include/optimizer/aqumv.h +++ b/src/include/optimizer/aqumv.h @@ -44,5 +44,6 @@ typedef struct AqumvContextData { typedef AqumvContextData *AqumvContext; extern RelOptInfo* answer_query_using_materialized_views(PlannerInfo *root, AqumvContextData *aqumv_context); +extern RelOptInfo* answer_query_using_materialized_views_for_join(PlannerInfo *root, AqumvContextData *aqumv_context); #endif /* AQUMV_H */ diff --git a/src/test/regress/expected/matview_data.out b/src/test/regress/expected/matview_data.out index 85697e90072..e415ceaf363 100644 --- a/src/test/regress/expected/matview_data.out +++ b/src/test/regress/expected/matview_data.out @@ -791,6 +791,1274 @@ drop materialized view mv_join2; drop table jt3; drop table jt2; drop table jt1; +-- +-- Test AQUMV (Answer Query Using Materialized Views) with join queries. +-- Each matching test shows EXPLAIN + SELECT with GUC off (original plan), +-- then EXPLAIN + SELECT with GUC on (MV rewrite). Results must match. +-- +create table aqj_t1(a int, b int) distributed by (a); +create table aqj_t2(a int, b int) distributed by (a); +create table aqj_t3(a int, b int) distributed by (a); +insert into aqj_t1 select i, i*10 from generate_series(1, 100) i; +insert into aqj_t2 select i, i*100 from generate_series(1, 100) i; +insert into aqj_t3 select i, i*1000 from generate_series(1, 100) i; +analyze aqj_t1; +analyze aqj_t2; +analyze aqj_t3; +-- 1. Two-table INNER JOIN exact match +create materialized view mv_aqj_join2 as + select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze mv_aqj_join2; +set enable_answer_query_using_materialized_views = off; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (aqj_t1.a = aqj_t2.a) + -> Seq Scan on aqj_t1 + -> Hash + -> Seq Scan on aqj_t2 + Optimizer: Postgres query optimizer +(7 rows) + +select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a order by 1 limit 5; + a | b +---+----- + 1 | 100 + 2 | 200 + 3 | 300 + 4 | 400 + 5 | 500 +(5 rows) + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_join2 + Optimizer: Postgres query optimizer +(3 rows) + +select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a order by 1 limit 5; + a | b +---+----- + 1 | 100 + 2 | 200 + 3 | 300 + 4 | 400 + 5 | 500 +(5 rows) + +-- 2. Join with WHERE clause +create materialized view mv_aqj_where as + select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze mv_aqj_where; +set enable_answer_query_using_materialized_views = off; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (aqj_t1.a = aqj_t2.a) + -> Seq Scan on aqj_t1 + Filter: (a > 5) + -> Hash + -> Seq Scan on aqj_t2 + Filter: (a > 5) + Optimizer: Postgres query optimizer +(9 rows) + +select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5 order by 1 limit 5; + a | b +----+------ + 6 | 600 + 7 | 700 + 8 | 800 + 9 | 900 + 10 | 1000 +(5 rows) + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_where + Optimizer: Postgres query optimizer +(3 rows) + +select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5 order by 1 limit 5; + a | b +----+------ + 6 | 600 + 7 | 700 + 8 | 800 + 9 | 900 + 10 | 1000 +(5 rows) + +-- 3. Join with GROUP BY + aggregate +create materialized view mv_aqj_agg as + select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze mv_aqj_agg; +set enable_answer_query_using_materialized_views = off; +explain(costs off) select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a; + QUERY PLAN +------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> HashAggregate + Group Key: aqj_t1.a + -> Hash Join + Hash Cond: (aqj_t1.a = aqj_t2.a) + -> Seq Scan on aqj_t1 + -> Hash + -> Seq Scan on aqj_t2 + Optimizer: Postgres query optimizer +(9 rows) + +select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a order by 1 limit 5; + a | cnt +---+----- + 1 | 1 + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 1 +(5 rows) + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_agg + Optimizer: Postgres query optimizer +(3 rows) + +select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a order by 1 limit 5; + a | cnt +---+----- + 1 | 1 + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 1 +(5 rows) + +-- 4. Non-match: different WHERE clause (should show Hash Join, not MV) +set enable_answer_query_using_materialized_views = on; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 10; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (aqj_t1.a = aqj_t2.a) + -> Seq Scan on aqj_t1 + Filter: (a > 10) + -> Hash + -> Seq Scan on aqj_t2 + Filter: (a > 10) + Optimizer: Postgres query optimizer +(9 rows) + +-- 5. Non-match: different target list +explain(costs off) select aqj_t1.b, aqj_t2.a from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (aqj_t1.a = aqj_t2.a) + -> Seq Scan on aqj_t1 + -> Hash + -> Seq Scan on aqj_t2 + Optimizer: Postgres query optimizer +(7 rows) + +-- 6. Non-match: different join type (INNER vs LEFT) +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 left join aqj_t2 on aqj_t1.a = aqj_t2.a; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Left Join + Hash Cond: (aqj_t1.a = aqj_t2.a) + -> Seq Scan on aqj_t1 + -> Hash + -> Seq Scan on aqj_t2 + Optimizer: Postgres query optimizer +(7 rows) + +-- 7. Three-table join +create materialized view mv_aqj_join3 as + select aqj_t1.a, aqj_t2.b, aqj_t3.b as c + from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze mv_aqj_join3; +set enable_answer_query_using_materialized_views = off; +explain(costs off) select aqj_t1.a, aqj_t2.b, aqj_t3.b as c + from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a; + QUERY PLAN +------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (aqj_t1.a = aqj_t3.a) + -> Hash Join + Hash Cond: (aqj_t1.a = aqj_t2.a) + -> Seq Scan on aqj_t1 + -> Hash + -> Seq Scan on aqj_t2 + -> Hash + -> Seq Scan on aqj_t3 + Optimizer: Postgres query optimizer +(11 rows) + +select aqj_t1.a, aqj_t2.b, aqj_t3.b as c + from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a + order by 1 limit 5; + a | b | c +---+-----+------ + 1 | 100 | 1000 + 2 | 200 | 2000 + 3 | 300 | 3000 + 4 | 400 | 4000 + 5 | 500 | 5000 +(5 rows) + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select aqj_t1.a, aqj_t2.b, aqj_t3.b as c + from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_join3 + Optimizer: Postgres query optimizer +(3 rows) + +select aqj_t1.a, aqj_t2.b, aqj_t3.b as c + from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a + order by 1 limit 5; + a | b | c +---+-----+------ + 1 | 100 | 1000 + 2 | 200 | 2000 + 3 | 300 | 3000 + 4 | 400 | 4000 + 5 | 500 | 5000 +(5 rows) + +-- 8. Implicit join (FROM t1, t2 WHERE ...) +create materialized view mv_aqj_implicit as + select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze mv_aqj_implicit; +set enable_answer_query_using_materialized_views = off; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (aqj_t1.a = aqj_t2.a) + -> Seq Scan on aqj_t1 + -> Hash + -> Seq Scan on aqj_t2 + Optimizer: Postgres query optimizer +(7 rows) + +select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a order by 1 limit 5; + a | b +---+----- + 1 | 100 + 2 | 200 + 3 | 300 + 4 | 400 + 5 | 500 +(5 rows) + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_implicit + Optimizer: Postgres query optimizer +(3 rows) + +select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a order by 1 limit 5; + a | b +---+----- + 1 | 100 + 2 | 200 + 3 | 300 + 4 | 400 + 5 | 500 +(5 rows) + +-- 9. MV not up-to-date: after INSERT on base table +insert into aqj_t1 values(999, 9990); +set enable_answer_query_using_materialized_views = on; +-- Should NOT use mv_aqj_join2 (status is 'i') +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (aqj_t1.a = aqj_t2.a) + -> Seq Scan on aqj_t1 + -> Hash + -> Seq Scan on aqj_t2 + Optimizer: Postgres query optimizer +(7 rows) + +-- 10. After REFRESH: should use MV again +refresh materialized view mv_aqj_join2; +analyze mv_aqj_join2; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_join2 + Optimizer: Postgres query optimizer +(3 rows) + +-- 11. GUC off: should NOT use MV +set enable_answer_query_using_materialized_views = off; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (aqj_t1.a = aqj_t2.a) + -> Seq Scan on aqj_t1 + -> Hash + -> Seq Scan on aqj_t2 + Optimizer: Postgres query optimizer +(7 rows) + +-- +-- More complex join AQUMV test cases with richer schemas +-- +create table aqj_orders( + order_id int, + customer_id int, + amount numeric(10,2), + status text, + order_date date +) distributed by (order_id); +create table aqj_customers( + customer_id int, + name text, + region text, + credit_limit numeric(10,2) +) distributed by (customer_id); +create table aqj_products( + product_id int, + name text, + category text, + price numeric(10,2) +) distributed by (product_id); +create table aqj_order_items( + item_id int, + order_id int, + product_id int, + quantity int +) distributed by (item_id); +insert into aqj_customers select i, 'cust_' || i, case when i % 3 = 0 then 'east' when i % 3 = 1 then 'west' else 'north' end, (i * 100)::numeric(10,2) from generate_series(1, 50) i; +insert into aqj_orders select i, (i % 50) + 1, (i * 10.5)::numeric(10,2), case when i % 4 = 0 then 'shipped' when i % 4 = 1 then 'pending' when i % 4 = 2 then 'delivered' else 'cancelled' end, '2024-01-01'::date + (i % 365) from generate_series(1, 200) i; +insert into aqj_products select i, 'prod_' || i, case when i % 5 = 0 then 'electronics' when i % 5 = 1 then 'books' when i % 5 = 2 then 'clothing' when i % 5 = 3 then 'food' else 'toys' end, (i * 5.99)::numeric(10,2) from generate_series(1, 30) i; +insert into aqj_order_items select i, (i % 200) + 1, (i % 30) + 1, (i % 10) + 1 from generate_series(1, 500) i; +analyze aqj_customers; +analyze aqj_orders; +analyze aqj_products; +analyze aqj_order_items; +-- 12. Join with multiple columns + WHERE on text column +create materialized view mv_aqj_orders_cust as + select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped'; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'order_id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze mv_aqj_orders_cust; +set enable_answer_query_using_materialized_views = off; +explain(costs off) select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped'; + QUERY PLAN +------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (o.customer_id = c.customer_id) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: o.customer_id + -> Seq Scan on aqj_orders o + Filter: (status = 'shipped'::text) + -> Hash + -> Seq Scan on aqj_customers c + Optimizer: Postgres query optimizer +(10 rows) + +select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped' + order by o.order_id limit 5; + order_id | amount | name | region +----------+--------+---------+-------- + 4 | 42.00 | cust_5 | north + 8 | 84.00 | cust_9 | east + 12 | 126.00 | cust_13 | west + 16 | 168.00 | cust_17 | north + 20 | 210.00 | cust_21 | east +(5 rows) + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped'; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_orders_cust + Optimizer: Postgres query optimizer +(3 rows) + +select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped' + order by o.order_id limit 5; + order_id | amount | name | region +----------+--------+---------+-------- + 4 | 42.00 | cust_5 | north + 8 | 84.00 | cust_9 | east + 12 | 126.00 | cust_13 | west + 16 | 168.00 | cust_17 | north + 20 | 210.00 | cust_21 | east +(5 rows) + +-- 13. Four-table join +create materialized view mv_aqj_order_details as + select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'order_id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze mv_aqj_order_details; +set enable_answer_query_using_materialized_views = off; +explain(costs off) + select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (oi.product_id = p.product_id) + -> Hash Join + Hash Cond: (o.order_id = oi.order_id) + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Hash Join + Hash Cond: (o.customer_id = c.customer_id) + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: o.customer_id + -> Seq Scan on aqj_orders o + -> Hash + -> Seq Scan on aqj_customers c + -> Hash + -> Seq Scan on aqj_order_items oi + -> Hash + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on aqj_products p + Optimizer: Postgres query optimizer +(19 rows) + +select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + order by o.order_id, p.name limit 5; + order_id | customer_name | product_name | quantity | price +----------+---------------+--------------+----------+-------- + 1 | cust_2 | prod_11 | 1 | 65.89 + 1 | cust_2 | prod_21 | 1 | 125.79 + 2 | cust_3 | prod_12 | 2 | 71.88 + 2 | cust_3 | prod_2 | 2 | 11.98 + 2 | cust_3 | prod_22 | 2 | 131.78 +(5 rows) + +set enable_answer_query_using_materialized_views = on; +explain(costs off) + select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_order_details + Optimizer: Postgres query optimizer +(3 rows) + +select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + order by o.order_id, p.name limit 5; + order_id | customer_name | product_name | quantity | price +----------+---------------+--------------+----------+-------- + 1 | cust_2 | prod_11 | 1 | 65.89 + 1 | cust_2 | prod_21 | 1 | 125.79 + 2 | cust_3 | prod_12 | 2 | 71.88 + 2 | cust_3 | prod_2 | 2 | 11.98 + 2 | cust_3 | prod_22 | 2 | 131.78 +(5 rows) + +-- 14. GROUP BY on join with multiple aggregates: sum, count, avg +create materialized view mv_aqj_cust_summary as + select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'region' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze mv_aqj_cust_summary; +set enable_answer_query_using_materialized_views = off; +explain(costs off) + select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region; + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Finalize HashAggregate + Group Key: c.region + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: c.region + -> Streaming Partial HashAggregate + Group Key: c.region + -> Hash Join + Hash Cond: (o.customer_id = c.customer_id) + -> Seq Scan on aqj_orders o + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on aqj_customers c + Optimizer: Postgres query optimizer +(14 rows) + +select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region + order by c.region; + region | order_count | total_amount | avg_amount +--------+-------------+--------------+----------------------- + east | 64 | 66864.00 | 1044.7500000000000000 + north | 68 | 71400.00 | 1050.0000000000000000 + west | 68 | 72786.00 | 1070.3823529411764706 +(3 rows) + +set enable_answer_query_using_materialized_views = on; +explain(costs off) + select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_cust_summary + Optimizer: Postgres query optimizer +(3 rows) + +select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region + order by c.region; + region | order_count | total_amount | avg_amount +--------+-------------+--------------+----------------------- + east | 64 | 66864.00 | 1044.7500000000000000 + north | 68 | 71400.00 | 1050.0000000000000000 + west | 68 | 72786.00 | 1070.3823529411764706 +(3 rows) + +-- 15. Join with expression in target list (arithmetic + function) +create materialized view mv_aqj_expr as + select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'order_id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze mv_aqj_expr; +set enable_answer_query_using_materialized_views = off; +explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id; + QUERY PLAN +--------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (o.customer_id = c.customer_id) + -> Seq Scan on aqj_orders o + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on aqj_customers c + Optimizer: Postgres query optimizer +(8 rows) + +select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + order by o.order_id limit 5; + order_id | amount_with_tax | name | region_upper +----------+-----------------+--------+-------------- + 1 | 11.550 | cust_2 | NORTH + 2 | 23.100 | cust_3 | EAST + 3 | 34.650 | cust_4 | WEST + 4 | 46.200 | cust_5 | NORTH + 5 | 57.750 | cust_6 | EAST +(5 rows) + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_expr + Optimizer: Postgres query optimizer +(3 rows) + +select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + order by o.order_id limit 5; + order_id | amount_with_tax | name | region_upper +----------+-----------------+--------+-------------- + 1 | 11.550 | cust_2 | NORTH + 2 | 23.100 | cust_3 | EAST + 3 | 34.650 | cust_4 | WEST + 4 | 46.200 | cust_5 | NORTH + 5 | 57.750 | cust_6 | EAST +(5 rows) + +-- 16. Non-match: same tables + expressions, but extra WHERE (should NOT match mv_aqj_expr) +set enable_answer_query_using_materialized_views = on; +explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where c.region = 'east'; + QUERY PLAN +--------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (o.customer_id = c.customer_id) + -> Seq Scan on aqj_orders o + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on aqj_customers c + Filter: (region = 'east'::text) + Optimizer: Postgres query optimizer +(9 rows) + +-- 17. Non-match: same tables but different aggregate target list +explain(costs off) + select c.region, sum(o.amount) as total_amount + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region; + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Finalize HashAggregate + Group Key: c.region + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: c.region + -> Streaming Partial HashAggregate + Group Key: c.region + -> Hash Join + Hash Cond: (o.customer_id = c.customer_id) + -> Seq Scan on aqj_orders o + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on aqj_customers c + Optimizer: Postgres query optimizer +(14 rows) + +-- 18. Non-match: different join order (o JOIN c vs c JOIN o) +explain(costs off) select o.order_id, o.amount, c.name, c.region + from aqj_customers c join aqj_orders o on o.customer_id = c.customer_id + where o.status = 'shipped'; + QUERY PLAN +------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (o.customer_id = c.customer_id) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: o.customer_id + -> Seq Scan on aqj_orders o + Filter: (status = 'shipped'::text) + -> Hash + -> Seq Scan on aqj_customers c + Optimizer: Postgres query optimizer +(10 rows) + +-- 19. Join with compound WHERE (multiple AND conditions) +create materialized view mv_aqj_compound_where as + select o.order_id, o.amount, c.name + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'pending' and c.region = 'west' and o.amount > 50; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'order_id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze mv_aqj_compound_where; +set enable_answer_query_using_materialized_views = off; +explain(costs off) select o.order_id, o.amount, c.name + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'pending' and c.region = 'west' and o.amount > 50; + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (o.customer_id = c.customer_id) + -> Seq Scan on aqj_orders o + Filter: ((amount > '50'::numeric) AND (status = 'pending'::text)) + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on aqj_customers c + Filter: (region = 'west'::text) + Optimizer: Postgres query optimizer +(10 rows) + +select o.order_id, o.amount, c.name + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'pending' and c.region = 'west' and o.amount > 50 + order by o.order_id limit 5; + order_id | amount | name +----------+--------+--------- + 9 | 94.50 | cust_10 + 21 | 220.50 | cust_22 + 33 | 346.50 | cust_34 + 45 | 472.50 | cust_46 + 53 | 556.50 | cust_4 +(5 rows) + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select o.order_id, o.amount, c.name + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'pending' and c.region = 'west' and o.amount > 50; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_compound_where + Optimizer: Postgres query optimizer +(3 rows) + +select o.order_id, o.amount, c.name + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'pending' and c.region = 'west' and o.amount > 50 + order by o.order_id limit 5; + order_id | amount | name +----------+--------+--------- + 9 | 94.50 | cust_10 + 21 | 220.50 | cust_22 + 33 | 346.50 | cust_34 + 45 | 472.50 | cust_46 + 53 | 556.50 | cust_4 +(5 rows) + +-- 20. Self-join +create materialized view mv_aqj_selfjoin as + select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 + from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id + where o1.order_id < o2.order_id; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze mv_aqj_selfjoin; +set enable_answer_query_using_materialized_views = off; +explain(costs off) + select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 + from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id + where o1.order_id < o2.order_id; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (o1.customer_id = o2.customer_id) + Join Filter: (o1.order_id < o2.order_id) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: o1.customer_id + -> Seq Scan on aqj_orders o1 + -> Hash + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: o2.customer_id + -> Seq Scan on aqj_orders o2 + Optimizer: Postgres query optimizer +(12 rows) + +select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 + from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id + where o1.order_id < o2.order_id + order by o1.order_id, o2.order_id limit 5; + id1 | id2 | amt1 | amt2 +-----+-----+-------+--------- + 1 | 51 | 10.50 | 535.50 + 1 | 101 | 10.50 | 1060.50 + 1 | 151 | 10.50 | 1585.50 + 2 | 52 | 21.00 | 546.00 + 2 | 102 | 21.00 | 1071.00 +(5 rows) + +set enable_answer_query_using_materialized_views = on; +explain(costs off) + select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 + from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id + where o1.order_id < o2.order_id; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_selfjoin + Optimizer: Postgres query optimizer +(3 rows) + +select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 + from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id + where o1.order_id < o2.order_id + order by o1.order_id, o2.order_id limit 5; + id1 | id2 | amt1 | amt2 +-----+-----+-------+--------- + 1 | 51 | 10.50 | 535.50 + 1 | 101 | 10.50 | 1060.50 + 1 | 151 | 10.50 | 1585.50 + 2 | 52 | 21.00 | 546.00 + 2 | 102 | 21.00 | 1071.00 +(5 rows) + +-- 21. GROUP BY with multi-column key on join +create materialized view mv_aqj_grp_multi as + select c.region, o.status, count(*) as cnt, sum(o.amount) as total + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region, o.status; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'region, status' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze mv_aqj_grp_multi; +set enable_answer_query_using_materialized_views = off; +explain(costs off) + select c.region, o.status, count(*) as cnt, sum(o.amount) as total + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region, o.status; + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Finalize HashAggregate + Group Key: c.region, o.status + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: c.region, o.status + -> Streaming Partial HashAggregate + Group Key: c.region, o.status + -> Hash Join + Hash Cond: (o.customer_id = c.customer_id) + -> Seq Scan on aqj_orders o + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on aqj_customers c + Optimizer: Postgres query optimizer +(14 rows) + +select c.region, o.status, count(*) as cnt, sum(o.amount) as total + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region, o.status + order by c.region, o.status limit 6; + region | status | cnt | total +--------+-----------+-----+---------- + east | cancelled | 16 | 16968.00 + east | delivered | 16 | 16464.00 + east | pending | 16 | 16968.00 + east | shipped | 16 | 16464.00 + north | cancelled | 18 | 19425.00 + north | delivered | 16 | 16800.00 +(6 rows) + +set enable_answer_query_using_materialized_views = on; +explain(costs off) + select c.region, o.status, count(*) as cnt, sum(o.amount) as total + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region, o.status; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_grp_multi + Optimizer: Postgres query optimizer +(3 rows) + +select c.region, o.status, count(*) as cnt, sum(o.amount) as total + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region, o.status + order by c.region, o.status limit 6; + region | status | cnt | total +--------+-----------+-----+---------- + east | cancelled | 16 | 16968.00 + east | delivered | 16 | 16464.00 + east | pending | 16 | 16968.00 + east | shipped | 16 | 16464.00 + north | cancelled | 18 | 19425.00 + north | delivered | 16 | 16800.00 +(6 rows) + +-- 22. Four-table join with WHERE and aggregate +create materialized view mv_aqj_3way_agg as + select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + where o.status = 'delivered' + group by c.region, p.category; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'region, category' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze mv_aqj_3way_agg; +set enable_answer_query_using_materialized_views = off; +explain(costs off) + select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + where o.status = 'delivered' + group by c.region, p.category; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Finalize HashAggregate + Group Key: c.region, p.category + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: c.region, p.category + -> Streaming Partial HashAggregate + Group Key: c.region, p.category + -> Hash Join + Hash Cond: (oi.product_id = p.product_id) + -> Hash Join + Hash Cond: (oi.order_id = o.order_id) + -> Seq Scan on aqj_order_items oi + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Hash Join + Hash Cond: (o.customer_id = c.customer_id) + -> Redistribute Motion 3:3 (slice4; segments: 3) + Hash Key: o.customer_id + -> Seq Scan on aqj_orders o + Filter: (status = 'delivered'::text) + -> Hash + -> Seq Scan on aqj_customers c + -> Hash + -> Broadcast Motion 3:3 (slice5; segments: 3) + -> Seq Scan on aqj_products p + Optimizer: Postgres query optimizer +(26 rows) + +select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + where o.status = 'delivered' + group by c.region, p.category + order by c.region, p.category limit 6; + region | category | total_qty | line_count +--------+-------------+-----------+------------ + east | books | 30 | 5 + east | clothing | 20 | 10 + east | electronics | 50 | 5 + east | food | 80 | 10 + east | toys | 40 | 10 + north | books | 60 | 10 +(6 rows) + +set enable_answer_query_using_materialized_views = on; +explain(costs off) + select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + where o.status = 'delivered' + group by c.region, p.category; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_3way_agg + Optimizer: Postgres query optimizer +(3 rows) + +select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + where o.status = 'delivered' + group by c.region, p.category + order by c.region, p.category limit 6; + region | category | total_qty | line_count +--------+-------------+-----------+------------ + east | books | 30 | 5 + east | clothing | 20 | 10 + east | electronics | 50 | 5 + east | food | 80 | 10 + east | toys | 40 | 10 + north | books | 60 | 10 +(6 rows) + +-- 23. Implicit four-table join (comma style) +create materialized view mv_aqj_implicit3 as + select o.order_id, c.name, p.name as product_name + from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p + where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id + and o.status = 'pending'; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'order_id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze mv_aqj_implicit3; +set enable_answer_query_using_materialized_views = off; +explain(costs off) + select o.order_id, c.name, p.name as product_name + from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p + where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id + and o.status = 'pending'; + QUERY PLAN +------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (oi.product_id = p.product_id) + -> Hash Join + Hash Cond: (oi.order_id = o.order_id) + -> Seq Scan on aqj_order_items oi + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Hash Join + Hash Cond: (o.customer_id = c.customer_id) + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: o.customer_id + -> Seq Scan on aqj_orders o + Filter: (status = 'pending'::text) + -> Hash + -> Seq Scan on aqj_customers c + -> Hash + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on aqj_products p + Optimizer: Postgres query optimizer +(20 rows) + +select o.order_id, c.name, p.name as product_name + from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p + where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id + and o.status = 'pending' + order by o.order_id, p.name limit 5; + order_id | name | product_name +----------+--------+-------------- + 1 | cust_2 | prod_11 + 1 | cust_2 | prod_21 + 5 | cust_6 | prod_15 + 5 | cust_6 | prod_25 + 5 | cust_6 | prod_5 +(5 rows) + +set enable_answer_query_using_materialized_views = on; +explain(costs off) + select o.order_id, c.name, p.name as product_name + from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p + where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id + and o.status = 'pending'; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_implicit3 + Optimizer: Postgres query optimizer +(3 rows) + +select o.order_id, c.name, p.name as product_name + from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p + where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id + and o.status = 'pending' + order by o.order_id, p.name limit 5; + order_id | name | product_name +----------+--------+-------------- + 1 | cust_2 | prod_11 + 1 | cust_2 | prod_21 + 5 | cust_6 | prod_15 + 5 | cust_6 | prod_25 + 5 | cust_6 | prod_5 +(5 rows) + +-- 24. Result correctness across DML + REFRESH cycle +insert into aqj_orders values(201, 1, 9999.99, 'shipped', '2025-12-31'); +set enable_answer_query_using_materialized_views = on; +-- Stale: should NOT use MV +explain(costs off) select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped'; + QUERY PLAN +------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (o.customer_id = c.customer_id) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: o.customer_id + -> Seq Scan on aqj_orders o + Filter: (status = 'shipped'::text) + -> Hash + -> Seq Scan on aqj_customers c + Optimizer: Postgres query optimizer +(10 rows) + +-- Refresh and verify MV is used again +refresh materialized view mv_aqj_orders_cust; +analyze mv_aqj_orders_cust; +explain(costs off) select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped'; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on mv_aqj_orders_cust + Optimizer: Postgres query optimizer +(3 rows) + +-- The new row should appear in results via MV scan +select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped' and o.order_id = 201; + order_id | amount | name | region +----------+---------+--------+-------- + 201 | 9999.99 | cust_1 | west +(1 row) + +-- 25. Post-DML comprehensive: refresh all, then verify GUC off vs on results +refresh materialized view mv_aqj_order_details; +refresh materialized view mv_aqj_expr; +refresh materialized view mv_aqj_selfjoin; +refresh materialized view mv_aqj_grp_multi; +refresh materialized view mv_aqj_3way_agg; +refresh materialized view mv_aqj_implicit3; +analyze mv_aqj_order_details; +analyze mv_aqj_expr; +analyze mv_aqj_selfjoin; +analyze mv_aqj_grp_multi; +analyze mv_aqj_3way_agg; +analyze mv_aqj_implicit3; +-- Verify four-table join results after DML+refresh +set enable_answer_query_using_materialized_views = off; +select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + order by o.order_id, p.name limit 5; + order_id | customer_name | product_name | quantity | price +----------+---------------+--------------+----------+-------- + 1 | cust_2 | prod_11 | 1 | 65.89 + 1 | cust_2 | prod_21 | 1 | 125.79 + 2 | cust_3 | prod_12 | 2 | 71.88 + 2 | cust_3 | prod_2 | 2 | 11.98 + 2 | cust_3 | prod_22 | 2 | 131.78 +(5 rows) + +set enable_answer_query_using_materialized_views = on; +select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + order by o.order_id, p.name limit 5; + order_id | customer_name | product_name | quantity | price +----------+---------------+--------------+----------+-------- + 1 | cust_2 | prod_11 | 1 | 65.89 + 1 | cust_2 | prod_21 | 1 | 125.79 + 2 | cust_3 | prod_12 | 2 | 71.88 + 2 | cust_3 | prod_2 | 2 | 11.98 + 2 | cust_3 | prod_22 | 2 | 131.78 +(5 rows) + +-- Verify expression MV results after DML+refresh +set enable_answer_query_using_materialized_views = off; +select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + order by o.order_id limit 5; + order_id | amount_with_tax | name | region_upper +----------+-----------------+--------+-------------- + 1 | 11.550 | cust_2 | NORTH + 2 | 23.100 | cust_3 | EAST + 3 | 34.650 | cust_4 | WEST + 4 | 46.200 | cust_5 | NORTH + 5 | 57.750 | cust_6 | EAST +(5 rows) + +set enable_answer_query_using_materialized_views = on; +select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + order by o.order_id limit 5; + order_id | amount_with_tax | name | region_upper +----------+-----------------+--------+-------------- + 1 | 11.550 | cust_2 | NORTH + 2 | 23.100 | cust_3 | EAST + 3 | 34.650 | cust_4 | WEST + 4 | 46.200 | cust_5 | NORTH + 5 | 57.750 | cust_6 | EAST +(5 rows) + +-- Verify multi-key GROUP BY results after DML+refresh +set enable_answer_query_using_materialized_views = off; +select c.region, o.status, count(*) as cnt, sum(o.amount) as total + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region, o.status + order by c.region, o.status limit 6; + region | status | cnt | total +--------+-----------+-----+---------- + east | cancelled | 16 | 16968.00 + east | delivered | 16 | 16464.00 + east | pending | 16 | 16968.00 + east | shipped | 16 | 16464.00 + north | cancelled | 18 | 19425.00 + north | delivered | 16 | 16800.00 +(6 rows) + +set enable_answer_query_using_materialized_views = on; +select c.region, o.status, count(*) as cnt, sum(o.amount) as total + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region, o.status + order by c.region, o.status limit 6; + region | status | cnt | total +--------+-----------+-----+---------- + east | cancelled | 16 | 16968.00 + east | delivered | 16 | 16464.00 + east | pending | 16 | 16968.00 + east | shipped | 16 | 16464.00 + north | cancelled | 18 | 19425.00 + north | delivered | 16 | 16800.00 +(6 rows) + +-- Clean up AQUMV join test objects +drop materialized view mv_aqj_implicit3; +drop materialized view mv_aqj_3way_agg; +drop materialized view mv_aqj_grp_multi; +drop materialized view mv_aqj_selfjoin; +drop materialized view mv_aqj_compound_where; +drop materialized view mv_aqj_expr; +drop materialized view mv_aqj_cust_summary; +drop materialized view mv_aqj_order_details; +drop materialized view mv_aqj_orders_cust; +drop materialized view mv_aqj_implicit; +drop materialized view mv_aqj_join3; +drop materialized view mv_aqj_agg; +drop materialized view mv_aqj_where; +drop materialized view mv_aqj_join2; +drop table aqj_order_items; +drop table aqj_products; +drop table aqj_customers; +drop table aqj_orders; +drop table aqj_t3; +drop table aqj_t2; +drop table aqj_t1; -- test drop table select mvname, datastatus from gp_matview_aux where mvname in ('mv0','mv1', 'mv2', 'mv3'); mvname | datastatus @@ -925,12 +2193,12 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | u - mv_par2_1 | u mv_par | u mv_par1 | u mv_par1_1 | u mv_par1_2 | u + mv_par2 | u + mv_par2_1 | u (6 rows) insert into par_1_prt_1 values (1, 1, 1); @@ -938,9 +2206,9 @@ insert into par_1_prt_1 values (1, 1, 1); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par1_2 | u mv_par2 | u mv_par2_1 | u - mv_par1_2 | u mv_par1_1 | i mv_par1 | i mv_par | i @@ -951,12 +2219,12 @@ insert into par values (1, 2, 2); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | i - mv_par2_1 | u mv_par1_2 | u + mv_par2_1 | u mv_par1_1 | i mv_par1 | i mv_par | i + mv_par2 | i (6 rows) refresh materialized view mv_par; @@ -971,11 +2239,11 @@ insert into par_1_prt_2_2_prt_1 values (1, 2, 1); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | i - mv_par2_1 | i mv_par1_2 | u mv_par1 | u mv_par1_1 | u + mv_par2_1 | i + mv_par2 | i mv_par | i (6 rows) @@ -986,11 +2254,11 @@ truncate par_1_prt_2; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | e - mv_par2_1 | e mv_par1_2 | u mv_par1 | u mv_par1_1 | u + mv_par2_1 | e + mv_par2 | e mv_par | e (6 rows) @@ -1000,11 +2268,11 @@ truncate par_1_prt_2; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | e - mv_par2_1 | e mv_par1_2 | u mv_par1 | u mv_par1_1 | u + mv_par2_1 | e + mv_par2 | e mv_par | e (6 rows) @@ -1018,9 +2286,9 @@ vacuum full par_1_prt_1_2_prt_1; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par1_2 | u mv_par2 | u mv_par2_1 | u - mv_par1_2 | u mv_par1_1 | r mv_par1 | r mv_par | r @@ -1038,8 +2306,8 @@ select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ mv_par2 | r - mv_par2_1 | r mv_par | r + mv_par2_1 | r mv_par1_2 | r mv_par1 | r mv_par1_1 | r @@ -1058,10 +2326,10 @@ NOTICE: table has parent, setting distribution columns to match parent table select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | u - mv_par2_1 | u mv_par1_1 | u mv_par1_2 | u + mv_par2 | u + mv_par2_1 | u mv_par1 | e mv_par | e (6 rows) @@ -1089,10 +2357,10 @@ alter table par_1_prt_1 detach partition par_1_prt_1_2_prt_1; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | u - mv_par2_1 | u mv_par1_1 | u mv_par1_2 | u + mv_par2 | u + mv_par2_1 | u mv_par1 | e mv_par | e (6 rows) @@ -1107,10 +2375,10 @@ alter table par_1_prt_1 attach partition new_par for values from (4) to (5); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | u - mv_par2_1 | u mv_par1_1 | u mv_par1_2 | u + mv_par2 | u + mv_par2_1 | u mv_par1 | e mv_par | e (6 rows) @@ -1130,12 +2398,12 @@ begin; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | u - mv_par2_1 | u mv_par | u mv_par1 | u mv_par1_1 | u mv_par1_2 | u + mv_par2 | u + mv_par2_1 | u (6 rows) insert into par values(1, 1, 1), (1, 1, 2); @@ -1155,23 +2423,23 @@ begin; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | u - mv_par2_1 | u mv_par | u mv_par1 | u mv_par1_1 | u mv_par1_2 | u + mv_par2 | u + mv_par2_1 | u (6 rows) insert into par_1_prt_2_2_prt_1 values(2, 2, 1); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | i - mv_par2_1 | i mv_par1 | u mv_par1_1 | u mv_par1_2 | u + mv_par2_1 | i + mv_par2 | i mv_par | i (6 rows) @@ -1180,23 +2448,23 @@ begin; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | u - mv_par2_1 | u mv_par | u mv_par1 | u mv_par1_1 | u mv_par1_2 | u + mv_par2 | u + mv_par2_1 | u (6 rows) delete from par where b = 2 and c = 1; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | e - mv_par2_1 | e mv_par1 | u mv_par1_1 | u mv_par1_2 | u + mv_par2_1 | e + mv_par2 | e mv_par | e (6 rows) @@ -1205,21 +2473,21 @@ begin; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | u - mv_par2_1 | u mv_par | u mv_par1 | u mv_par1_1 | u mv_par1_2 | u + mv_par2 | u + mv_par2_1 | u (6 rows) delete from par_1_prt_1_2_prt_2; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par1_1 | u mv_par2 | u mv_par2_1 | u - mv_par1_1 | u mv_par1_2 | e mv_par1 | e mv_par | e @@ -1231,12 +2499,12 @@ begin; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | u - mv_par2_1 | u mv_par | u mv_par1 | u mv_par1_1 | u mv_par1_2 | u + mv_par2 | u + mv_par2_1 | u (6 rows) update par set c = 2 where b = 1 and c = 1; @@ -1257,12 +2525,12 @@ begin; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | u - mv_par2_1 | u mv_par | u mv_par1 | u mv_par1_1 | u mv_par1_2 | u + mv_par2 | u + mv_par2_1 | u (6 rows) update par set c = 2, a = 2 where b = 1 and c = 1; @@ -1281,6 +2549,7 @@ abort; -- Test report warning if extend protocol data is not consumed. --start_ignore drop extension gp_inject_fault; +ERROR: extension "gp_inject_fault" does not exist create extension gp_inject_fault; --end_ignore select gp_inject_fault_infinite('consume_extend_protocol_data', 'skip', dbid) diff --git a/src/test/regress/sql/matview_data.sql b/src/test/regress/sql/matview_data.sql index 059a5a97bf4..8b5a56986e0 100644 --- a/src/test/regress/sql/matview_data.sql +++ b/src/test/regress/sql/matview_data.sql @@ -343,6 +343,541 @@ drop table jt3; drop table jt2; drop table jt1; +-- +-- Test AQUMV (Answer Query Using Materialized Views) with join queries. +-- Each matching test shows EXPLAIN + SELECT with GUC off (original plan), +-- then EXPLAIN + SELECT with GUC on (MV rewrite). Results must match. +-- +create table aqj_t1(a int, b int) distributed by (a); +create table aqj_t2(a int, b int) distributed by (a); +create table aqj_t3(a int, b int) distributed by (a); +insert into aqj_t1 select i, i*10 from generate_series(1, 100) i; +insert into aqj_t2 select i, i*100 from generate_series(1, 100) i; +insert into aqj_t3 select i, i*1000 from generate_series(1, 100) i; +analyze aqj_t1; +analyze aqj_t2; +analyze aqj_t3; + +-- 1. Two-table INNER JOIN exact match +create materialized view mv_aqj_join2 as + select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; +analyze mv_aqj_join2; + +set enable_answer_query_using_materialized_views = off; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; +select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a order by 1 limit 5; + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; +select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a order by 1 limit 5; + +-- 2. Join with WHERE clause +create materialized view mv_aqj_where as + select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5; +analyze mv_aqj_where; + +set enable_answer_query_using_materialized_views = off; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5; +select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5 order by 1 limit 5; + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5; +select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5 order by 1 limit 5; + +-- 3. Join with GROUP BY + aggregate +create materialized view mv_aqj_agg as + select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a; +analyze mv_aqj_agg; + +set enable_answer_query_using_materialized_views = off; +explain(costs off) select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a; +select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a order by 1 limit 5; + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a; +select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a order by 1 limit 5; + +-- 4. Non-match: different WHERE clause (should show Hash Join, not MV) +set enable_answer_query_using_materialized_views = on; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 10; + +-- 5. Non-match: different target list +explain(costs off) select aqj_t1.b, aqj_t2.a from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; + +-- 6. Non-match: different join type (INNER vs LEFT) +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 left join aqj_t2 on aqj_t1.a = aqj_t2.a; + +-- 7. Three-table join +create materialized view mv_aqj_join3 as + select aqj_t1.a, aqj_t2.b, aqj_t3.b as c + from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a; +analyze mv_aqj_join3; + +set enable_answer_query_using_materialized_views = off; +explain(costs off) select aqj_t1.a, aqj_t2.b, aqj_t3.b as c + from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a; +select aqj_t1.a, aqj_t2.b, aqj_t3.b as c + from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a + order by 1 limit 5; + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select aqj_t1.a, aqj_t2.b, aqj_t3.b as c + from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a; +select aqj_t1.a, aqj_t2.b, aqj_t3.b as c + from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a + order by 1 limit 5; + +-- 8. Implicit join (FROM t1, t2 WHERE ...) +create materialized view mv_aqj_implicit as + select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a; +analyze mv_aqj_implicit; + +set enable_answer_query_using_materialized_views = off; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a; +select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a order by 1 limit 5; + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a; +select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a order by 1 limit 5; + +-- 9. MV not up-to-date: after INSERT on base table +insert into aqj_t1 values(999, 9990); +set enable_answer_query_using_materialized_views = on; +-- Should NOT use mv_aqj_join2 (status is 'i') +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; + +-- 10. After REFRESH: should use MV again +refresh materialized view mv_aqj_join2; +analyze mv_aqj_join2; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; + +-- 11. GUC off: should NOT use MV +set enable_answer_query_using_materialized_views = off; +explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; + +-- +-- More complex join AQUMV test cases with richer schemas +-- + +create table aqj_orders( + order_id int, + customer_id int, + amount numeric(10,2), + status text, + order_date date +) distributed by (order_id); + +create table aqj_customers( + customer_id int, + name text, + region text, + credit_limit numeric(10,2) +) distributed by (customer_id); + +create table aqj_products( + product_id int, + name text, + category text, + price numeric(10,2) +) distributed by (product_id); + +create table aqj_order_items( + item_id int, + order_id int, + product_id int, + quantity int +) distributed by (item_id); + +insert into aqj_customers select i, 'cust_' || i, case when i % 3 = 0 then 'east' when i % 3 = 1 then 'west' else 'north' end, (i * 100)::numeric(10,2) from generate_series(1, 50) i; +insert into aqj_orders select i, (i % 50) + 1, (i * 10.5)::numeric(10,2), case when i % 4 = 0 then 'shipped' when i % 4 = 1 then 'pending' when i % 4 = 2 then 'delivered' else 'cancelled' end, '2024-01-01'::date + (i % 365) from generate_series(1, 200) i; +insert into aqj_products select i, 'prod_' || i, case when i % 5 = 0 then 'electronics' when i % 5 = 1 then 'books' when i % 5 = 2 then 'clothing' when i % 5 = 3 then 'food' else 'toys' end, (i * 5.99)::numeric(10,2) from generate_series(1, 30) i; +insert into aqj_order_items select i, (i % 200) + 1, (i % 30) + 1, (i % 10) + 1 from generate_series(1, 500) i; + +analyze aqj_customers; +analyze aqj_orders; +analyze aqj_products; +analyze aqj_order_items; + +-- 12. Join with multiple columns + WHERE on text column +create materialized view mv_aqj_orders_cust as + select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped'; +analyze mv_aqj_orders_cust; + +set enable_answer_query_using_materialized_views = off; +explain(costs off) select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped'; +select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped' + order by o.order_id limit 5; + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped'; +select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped' + order by o.order_id limit 5; + +-- 13. Four-table join +create materialized view mv_aqj_order_details as + select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id; +analyze mv_aqj_order_details; + +set enable_answer_query_using_materialized_views = off; +explain(costs off) + select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id; +select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + order by o.order_id, p.name limit 5; + +set enable_answer_query_using_materialized_views = on; +explain(costs off) + select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id; +select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + order by o.order_id, p.name limit 5; + +-- 14. GROUP BY on join with multiple aggregates: sum, count, avg +create materialized view mv_aqj_cust_summary as + select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region; +analyze mv_aqj_cust_summary; + +set enable_answer_query_using_materialized_views = off; +explain(costs off) + select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region; +select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region + order by c.region; + +set enable_answer_query_using_materialized_views = on; +explain(costs off) + select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region; +select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region + order by c.region; + +-- 15. Join with expression in target list (arithmetic + function) +create materialized view mv_aqj_expr as + select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id; +analyze mv_aqj_expr; + +set enable_answer_query_using_materialized_views = off; +explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id; +select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + order by o.order_id limit 5; + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id; +select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + order by o.order_id limit 5; + +-- 16. Non-match: same tables + expressions, but extra WHERE (should NOT match mv_aqj_expr) +set enable_answer_query_using_materialized_views = on; +explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where c.region = 'east'; + +-- 17. Non-match: same tables but different aggregate target list +explain(costs off) + select c.region, sum(o.amount) as total_amount + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region; + +-- 18. Non-match: different join order (o JOIN c vs c JOIN o) +explain(costs off) select o.order_id, o.amount, c.name, c.region + from aqj_customers c join aqj_orders o on o.customer_id = c.customer_id + where o.status = 'shipped'; + +-- 19. Join with compound WHERE (multiple AND conditions) +create materialized view mv_aqj_compound_where as + select o.order_id, o.amount, c.name + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'pending' and c.region = 'west' and o.amount > 50; +analyze mv_aqj_compound_where; + +set enable_answer_query_using_materialized_views = off; +explain(costs off) select o.order_id, o.amount, c.name + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'pending' and c.region = 'west' and o.amount > 50; +select o.order_id, o.amount, c.name + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'pending' and c.region = 'west' and o.amount > 50 + order by o.order_id limit 5; + +set enable_answer_query_using_materialized_views = on; +explain(costs off) select o.order_id, o.amount, c.name + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'pending' and c.region = 'west' and o.amount > 50; +select o.order_id, o.amount, c.name + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'pending' and c.region = 'west' and o.amount > 50 + order by o.order_id limit 5; + +-- 20. Self-join +create materialized view mv_aqj_selfjoin as + select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 + from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id + where o1.order_id < o2.order_id; +analyze mv_aqj_selfjoin; + +set enable_answer_query_using_materialized_views = off; +explain(costs off) + select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 + from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id + where o1.order_id < o2.order_id; +select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 + from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id + where o1.order_id < o2.order_id + order by o1.order_id, o2.order_id limit 5; + +set enable_answer_query_using_materialized_views = on; +explain(costs off) + select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 + from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id + where o1.order_id < o2.order_id; +select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 + from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id + where o1.order_id < o2.order_id + order by o1.order_id, o2.order_id limit 5; + +-- 21. GROUP BY with multi-column key on join +create materialized view mv_aqj_grp_multi as + select c.region, o.status, count(*) as cnt, sum(o.amount) as total + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region, o.status; +analyze mv_aqj_grp_multi; + +set enable_answer_query_using_materialized_views = off; +explain(costs off) + select c.region, o.status, count(*) as cnt, sum(o.amount) as total + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region, o.status; +select c.region, o.status, count(*) as cnt, sum(o.amount) as total + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region, o.status + order by c.region, o.status limit 6; + +set enable_answer_query_using_materialized_views = on; +explain(costs off) + select c.region, o.status, count(*) as cnt, sum(o.amount) as total + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region, o.status; +select c.region, o.status, count(*) as cnt, sum(o.amount) as total + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region, o.status + order by c.region, o.status limit 6; + +-- 22. Four-table join with WHERE and aggregate +create materialized view mv_aqj_3way_agg as + select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + where o.status = 'delivered' + group by c.region, p.category; +analyze mv_aqj_3way_agg; + +set enable_answer_query_using_materialized_views = off; +explain(costs off) + select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + where o.status = 'delivered' + group by c.region, p.category; +select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + where o.status = 'delivered' + group by c.region, p.category + order by c.region, p.category limit 6; + +set enable_answer_query_using_materialized_views = on; +explain(costs off) + select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + where o.status = 'delivered' + group by c.region, p.category; +select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + where o.status = 'delivered' + group by c.region, p.category + order by c.region, p.category limit 6; + +-- 23. Implicit four-table join (comma style) +create materialized view mv_aqj_implicit3 as + select o.order_id, c.name, p.name as product_name + from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p + where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id + and o.status = 'pending'; +analyze mv_aqj_implicit3; + +set enable_answer_query_using_materialized_views = off; +explain(costs off) + select o.order_id, c.name, p.name as product_name + from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p + where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id + and o.status = 'pending'; +select o.order_id, c.name, p.name as product_name + from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p + where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id + and o.status = 'pending' + order by o.order_id, p.name limit 5; + +set enable_answer_query_using_materialized_views = on; +explain(costs off) + select o.order_id, c.name, p.name as product_name + from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p + where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id + and o.status = 'pending'; +select o.order_id, c.name, p.name as product_name + from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p + where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id + and o.status = 'pending' + order by o.order_id, p.name limit 5; + +-- 24. Result correctness across DML + REFRESH cycle +insert into aqj_orders values(201, 1, 9999.99, 'shipped', '2025-12-31'); +set enable_answer_query_using_materialized_views = on; +-- Stale: should NOT use MV +explain(costs off) select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped'; +-- Refresh and verify MV is used again +refresh materialized view mv_aqj_orders_cust; +analyze mv_aqj_orders_cust; +explain(costs off) select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped'; +-- The new row should appear in results via MV scan +select o.order_id, o.amount, c.name, c.region + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + where o.status = 'shipped' and o.order_id = 201; + +-- 25. Post-DML comprehensive: refresh all, then verify GUC off vs on results +refresh materialized view mv_aqj_order_details; +refresh materialized view mv_aqj_expr; +refresh materialized view mv_aqj_selfjoin; +refresh materialized view mv_aqj_grp_multi; +refresh materialized view mv_aqj_3way_agg; +refresh materialized view mv_aqj_implicit3; +analyze mv_aqj_order_details; +analyze mv_aqj_expr; +analyze mv_aqj_selfjoin; +analyze mv_aqj_grp_multi; +analyze mv_aqj_3way_agg; +analyze mv_aqj_implicit3; + +-- Verify four-table join results after DML+refresh +set enable_answer_query_using_materialized_views = off; +select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + order by o.order_id, p.name limit 5; + +set enable_answer_query_using_materialized_views = on; +select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price + from aqj_orders o + join aqj_customers c on o.customer_id = c.customer_id + join aqj_order_items oi on o.order_id = oi.order_id + join aqj_products p on oi.product_id = p.product_id + order by o.order_id, p.name limit 5; + +-- Verify expression MV results after DML+refresh +set enable_answer_query_using_materialized_views = off; +select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + order by o.order_id limit 5; + +set enable_answer_query_using_materialized_views = on; +select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + order by o.order_id limit 5; + +-- Verify multi-key GROUP BY results after DML+refresh +set enable_answer_query_using_materialized_views = off; +select c.region, o.status, count(*) as cnt, sum(o.amount) as total + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region, o.status + order by c.region, o.status limit 6; + +set enable_answer_query_using_materialized_views = on; +select c.region, o.status, count(*) as cnt, sum(o.amount) as total + from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id + group by c.region, o.status + order by c.region, o.status limit 6; + +-- Clean up AQUMV join test objects +drop materialized view mv_aqj_implicit3; +drop materialized view mv_aqj_3way_agg; +drop materialized view mv_aqj_grp_multi; +drop materialized view mv_aqj_selfjoin; +drop materialized view mv_aqj_compound_where; +drop materialized view mv_aqj_expr; +drop materialized view mv_aqj_cust_summary; +drop materialized view mv_aqj_order_details; +drop materialized view mv_aqj_orders_cust; +drop materialized view mv_aqj_implicit; +drop materialized view mv_aqj_join3; +drop materialized view mv_aqj_agg; +drop materialized view mv_aqj_where; +drop materialized view mv_aqj_join2; +drop table aqj_order_items; +drop table aqj_products; +drop table aqj_customers; +drop table aqj_orders; +drop table aqj_t3; +drop table aqj_t2; +drop table aqj_t1; + -- test drop table select mvname, datastatus from gp_matview_aux where mvname in ('mv0','mv1', 'mv2', 'mv3'); drop materialized view mv2;