摘要:
duckdb的物理计划很有特点, 使用了流水线的并行处理. 本文以聚合为例对其进行分析。
架构:
静态结构:
动态结构:
核心数据结构:
PhysicalOperator
//! The physical operator type
PhysicalOperatorType type;
//! The set of children of the operator
vector<unique_ptr<PhysicalOperator>> children;
//! The types returned by this physical operator
vector<LogicalType> types;
//! The estimated cardinality of this physical operator
idx_t estimated_cardinality;
unique_ptr<EstimatedProperties> estimated_props;
//! The global sink state of this operator
unique_ptr<GlobalSinkState> sink_state;
//! The global state of this operator
unique_ptr<GlobalOperatorState> op_state;
//! Lock for (re)setting any of the operator states
mutex lock;
//! The sink method is called constantly with new input, as long as new input is available. Note that this method
//! CAN be called in parallel, proper locking is needed when accessing data inside the GlobalSinkState.
virtual SinkResultType Sink(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate,
DataChunk &input) const;
// The combine is called when a single thread has completed execution of its part of the pipeline, it is the final
// time that a specific LocalSinkState is accessible. This method can be called in parallel while other Sink() or
// Combine() calls are active on the same GlobalSinkState.
virtual void Combine(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate) const;
//! The finalize is called when ALL threads are finished execution. It is called only once per pipeline, and is
//! entirely single threaded.
//! If Finalize returns SinkResultType::FINISHED, the sink is marked as finished
virtual SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
GlobalSinkState &gstate) const;
PhysicalHashAggregate
//! The grouping sets
GroupedAggregateData grouped_aggregate_data;
vector<GroupingSet> grouping_sets;
//! The radix partitioned hash tables (one per grouping set)
vector<HashAggregateGroupingData> groupings;
unique_ptr<DistinctAggregateCollectionInfo> distinct_collection_info;
//! A recreation of the input chunk, with nulls for everything that isnt a group
vector<LogicalType> input_group_types;
// Filters given to Sink and friends
vector<idx_t> non_distinct_filter;
vector<idx_t> distinct_filter;
unordered_m