Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
875 changes: 707 additions & 168 deletions src/backend/commands/analyze.c

Large diffs are not rendered by default.

210 changes: 184 additions & 26 deletions src/backend/commands/analyzefuncs.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "funcapi.h"
#include "utils/syscache.h"

/**
* Statistics related parameters.
Expand Down Expand Up @@ -56,36 +57,36 @@ bool gp_statistics_use_fkeys = false;
* the actual sample rows.
*
* To make things even more complicated, each sample row contains one extra
* column too: oversized_cols_bitmap. It's a bitmap indicating which attributes
* on the sample row were omitted, because they were "too large". The omitted
* attributes are returned as NULLs, and the bitmap can be used to distinguish
* real NULLs from values that were too large to be included in the sample. The
* bitmap is represented as a text column, with '0' or '1' for every column.
* column too: oversized_cols_length. It's an array indicating which attributes
* on the sample row were omitted and stores these omitted attributes' length,
* because they were "too large". The omitted attributes are returned as NULLs,
* and the array can be used to distinguish real NULLs from values that were
* too large to be included in the sample.
*
* So overall, this returns a result set like this:
*
* postgres=# select * from pg_catalog.gp_acquire_sample_rows('foo'::regclass, 400, 'f') as (
* -- special columns
* totalrows pg_catalog.float8,
* totaldeadrows pg_catalog.float8,
* oversized_cols_bitmap pg_catalog.text,
* oversized_cols_length pg_catalog._float8,
* -- columns matching the table
* id int4,
* t text
* );
* totalrows | totaldeadrows | oversized_cols_bitmap | id | t
* totalrows | totaldeadrows | oversized_cols_length | id | t
* -----------+---------------+-----------------------+-----+---------
* | | | 1 | foo
* | | | 2 | bar
* | | 01 | 50 |
* | | {0,3004} | 50 |
* | | | 100 | foo 100
* 2 | 0 | | |
* 1 | 0 | | |
* 1 | 0 | | |
* (7 rows)
*
* The first four rows form the actual sample. One of the columns contained
* an oversized text datum. The function is marked as EXECUTE ON SEGMENTS in
* an oversized array datum. The function is marked as EXECUTE ON SEGMENTS in
* the catalog so you get one summary row *for each segment*.
*/
Datum
Expand Down Expand Up @@ -181,8 +182,8 @@ gp_acquire_sample_rows(PG_FUNCTION_ARGS)
/* extra column to indicate oversize cols */
TupleDescInitEntry(outDesc,
3,
"oversized_cols_bitmap",
TEXTOID,
"oversized_cols_length",
FLOAT8ARRAYOID,
-1,
0);

Expand Down Expand Up @@ -214,6 +215,15 @@ gp_acquire_sample_rows(PG_FUNCTION_ARGS)

ctx->index = 0;
ctx->summary_sent = false;
/*
* we only get sample data from segindex 0 for replicated table
*/
if (Gp_role == GP_ROLE_EXECUTE && GpPolicyIsReplicated(onerel->rd_cdbpolicy)
&& GpIdentity.segindex > 0)
{
ctx->index = ctx->num_sample_rows;
ctx->summary_sent = true;
}

MemoryContextSwitchTo(oldcontext);
}
Expand All @@ -235,17 +245,17 @@ gp_acquire_sample_rows(PG_FUNCTION_ARGS)
HeapTuple relTuple = ctx->sample_rows[ctx->index];
int attno;
int outattno;
Bitmapset *toolarge = NULL;
bool has_toolarge = false;
Datum *relvalues = (Datum *) palloc(relDesc->natts * sizeof(Datum));
bool *relnulls = (bool *) palloc(relDesc->natts * sizeof(bool));
Datum *oversized_cols_length = (Datum *) palloc0(relDesc->natts * sizeof(Datum));

heap_deform_tuple(relTuple, relDesc, relvalues, relnulls);

outattno = NUM_SAMPLE_FIXED_COLS + 1;
for (attno = 1; attno <= relDesc->natts; attno++)
{
Form_pg_attribute relatt = TupleDescAttr(relDesc, attno - 1);
bool is_toolarge = false;
Datum relvalue;
bool relnull;

Expand All @@ -261,8 +271,8 @@ gp_acquire_sample_rows(PG_FUNCTION_ARGS)

if (toasted_size > WIDTH_THRESHOLD)
{
toolarge = bms_add_member(toolarge, outattno - NUM_SAMPLE_FIXED_COLS);
is_toolarge = true;
oversized_cols_length[attno - 1] = Float8GetDatum((double)toasted_size);
has_toolarge = true;
relvalue = (Datum) 0;
relnull = true;
}
Expand All @@ -276,18 +286,10 @@ gp_acquire_sample_rows(PG_FUNCTION_ARGS)
* If any of the attributes were oversized, construct the text datum
* to represent the bitmap.
*/
if (toolarge)
if (has_toolarge)
{
char *toolarge_str;
int i;
int live_natts = outDesc->natts - NUM_SAMPLE_FIXED_COLS;

toolarge_str = palloc((live_natts + 1) * sizeof(char));
for (i = 0; i < live_natts; i++)
toolarge_str[i] = bms_is_member(i + 1, toolarge) ? '1' : '0';
toolarge_str[i] = '\0';

outvalues[2] = CStringGetTextDatum(toolarge_str);
outvalues[2] = PointerGetDatum(construct_array(oversized_cols_length, relDesc->natts,
FLOAT8OID, 8, true, 'd'));
outnulls[2] = false;
}
else
Expand Down Expand Up @@ -368,3 +370,159 @@ gp_acquire_sample_rows_col_type(Oid typid)
}
return typid;
}

/*
* gp_acquire_correlations - Acquire each column's correlation for a table.
* This is an internal function called in gp_acquire_correlations_dispatcher.
* this function will return a result set, a row for each alive column.
* each row contains 3 columns: attnum, the correlation for it and totalrows.
* if correlation is null, set totalrows to 0 for it.
*
* So overall, this returns a result set like this:
* create table t(tc1 int, tc2 int, tc3 int);
* insert values.
* alter table t drop column tc2;
*
* attnum | correlation| totalrows
* ----------+------------|+------------
* 0 | 0.8 | 200
* 2 | | 0
*/
Datum
gp_acquire_correlations(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx = NULL;
gp_acquire_correlation_context *ctx;
MemoryContext oldcontext;
Oid relOid = PG_GETARG_OID(0);
bool inherited = PG_GETARG_BOOL(1);
TupleDesc relDesc;
TupleDesc outDesc;

if (SRF_IS_FIRSTCALL())
{
Relation onerel;
funcctx = SRF_FIRSTCALL_INIT();

/*
* switch to memory context appropriate for multiple function
* calls
*/
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

/* Construct the context to keep across calls. */
ctx = (gp_acquire_correlation_context *) palloc0(sizeof(gp_acquire_correlation_context));

if (!pg_class_ownercheck(relOid, GetUserId()))
aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TABLE,
get_rel_name(relOid));

onerel = table_open(relOid, AccessShareLock);
relDesc = RelationGetDescr(onerel);

outDesc = CreateTemplateTupleDesc(3);
TupleDescInitEntry(outDesc,
1,
"attnum",
INT4OID,
-1,
0);
TupleDescInitEntry(outDesc,
2,
"correlation",
FLOAT4OID,
-1,
0);
TupleDescInitEntry(outDesc,
3,
"totalrows",
INT4OID,
-1,
0);

BlessTupleDesc(outDesc);
funcctx->tuple_desc = outDesc;

ctx->onerel = onerel;
funcctx->user_fctx = ctx;
ctx->outDesc = outDesc;

ctx->index = 0;
ctx->totalAttr = relDesc->natts;
MemoryContextSwitchTo(oldcontext);
}

/* stuff done on every call of the function */
funcctx = SRF_PERCALL_SETUP();

ctx = funcctx->user_fctx;
relDesc = RelationGetDescr(ctx->onerel);
outDesc = ctx->outDesc;

Datum *outvalues = (Datum *) palloc(outDesc->natts * sizeof(Datum));
bool *outnulls = (bool *) palloc(outDesc->natts * sizeof(bool));
HeapTuple res;
int attno = ctx->index;

/* Return all alive attribute correlation */
for (; attno < ctx->totalAttr; attno++)
{
/* get the correlation of the column */
int totalrows = 0;
HeapTuple statsTuple;
Form_pg_attribute relatt = TupleDescAttr(relDesc, attno);
if (relatt->attisdropped)
continue;
statsTuple = SearchSysCache3(STATRELATTINH,
ObjectIdGetDatum(relOid),
Int16GetDatum(attno + 1),
BoolGetDatum(inherited));
outvalues[0] = Int32GetDatum(attno);
outnulls[0] = false;

if (HeapTupleIsValid(statsTuple))
{
AttStatsSlot sslot;

if (get_attstatsslot(&sslot, statsTuple,
STATISTIC_KIND_CORRELATION, InvalidOid,
ATTSTATSSLOT_NUMBERS))
{
float4 varCorrelation;
Assert(sslot.nnumbers == 1);
varCorrelation = sslot.numbers[0];

free_attstatsslot(&sslot);

outvalues[1] = Float4GetDatum(varCorrelation);
outnulls[1] = false;
totalrows = ctx->onerel->rd_rel->reltuples;
}
else
{
outvalues[1] = (Datum) 0;
outnulls[1] = true;
}
ReleaseSysCache(statsTuple);
}
else
{
outvalues[1] = (Datum) 0;
outnulls[1] = true;
}

outvalues[2] = Int32GetDatum(totalrows);
outnulls[2] = false;

res = heap_form_tuple(outDesc, outvalues, outnulls);
ctx->index = attno + 1;

SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(res));
}

table_close(ctx->onerel, AccessShareLock);
pfree(ctx);
funcctx->user_fctx = NULL;

SRF_RETURN_DONE(funcctx);
}
2 changes: 1 addition & 1 deletion src/backend/tsearch/ts_typanalyze.c
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ compute_tsvector_stats(VacAttrStats *stats,
stats->stats_valid = true;
/* Do the simple null-frac and average width stats */
stats->stanullfrac = (double) null_cnt / (double) samplerows;
stats->stawidth = total_width / (double) nonnull_cnt;
stats->stawidth = (total_width + stats->totalwidelength) / (double) (nonnull_cnt + stats->widerow_num);

/* Assume it's a unique column (see notes above) */
stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
Expand Down
2 changes: 1 addition & 1 deletion src/backend/utils/adt/rangetypes_typanalyze.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
stats->stats_valid = true;
/* Do the simple null-frac and width stats */
stats->stanullfrac = (double) null_cnt / (double) samplerows;
stats->stawidth = total_width / (double) non_null_cnt;
stats->stawidth = (total_width + stats->totalwidelength) / (double) (non_null_cnt + stats->widerow_num);

/* Estimate that non-null values are unique */
stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
Expand Down
4 changes: 3 additions & 1 deletion src/include/catalog/pg_proc.dat
Original file line number Diff line number Diff line change
Expand Up @@ -11810,7 +11810,7 @@
{ oid => 6464, descr => 'get backends of overflowed subtransaction',
proname => 'gp_get_suboverflowed_backends', provolatile => 'v', prorettype => '_int4', proargtypes => '', prosrc => 'gp_get_suboverflowed_backends' },

{ oid => 6040, descr => 'get gp all segments pg_snapshot',
{ oid => 6041, descr => 'get gp all segments pg_snapshot',
proname => 'gp_current_snapshot', proisstrict => 'f',
proretset => 't', provolatile => 'v', proparallel => 'r',
prorettype => 'record', proargtypes => '',
Expand Down Expand Up @@ -11992,6 +11992,8 @@
# Analyze related
{ oid => 6038, descr => 'Collect a random sample of rows from table',
proname => 'gp_acquire_sample_rows', prorows => '1000', proretset => 't', provolatile => 'v', proparallel => 'u', prorettype => 'record', proargtypes => 'oid int4 bool', prosrc => 'gp_acquire_sample_rows', proexeclocation => 's' },
{ oid => 6040, descr => 'Collect correlations from segments',
proname => 'gp_acquire_correlations', prorows => '10', proretset => 't', provolatile => 'v', proparallel => 'u', prorettype => 'record', proargtypes => 'oid bool', prosrc => 'gp_acquire_correlations', proexeclocation => 's' },

# Backoff related
{ oid => 7016, descr => 'change weight of all the backends for a given session id',
Expand Down
26 changes: 26 additions & 0 deletions src/include/commands/vacuum.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,10 @@ typedef struct VacAttrStats
int minrows; /* Minimum # of rows wanted for stats */
void *extra_data; /* for extra type-specific data */

/* These fields are used to compute stawidth during the compute_stats routine. */
double totalwidelength;/* total length of toowide row */
int widerow_num; /* # of toowide row */

/*
* These fields are to be filled in by the compute_stats routine. (They
* are initialized to zero when the struct is created.)
Expand Down Expand Up @@ -179,6 +183,9 @@ typedef struct VacAttrStats
bool *exprnulls;
int rowstride;
bool merge_stats;
bool corrnull; /* whether correlation value is null */
bool partitiontbl_qd; /* analyze is on QD and the policy of table is partitioned */
float4 corrval; /* correlation gathered from segments */
} VacAttrStats;


Expand Down Expand Up @@ -327,6 +334,24 @@ typedef struct
bool summary_sent;
} gp_acquire_sample_rows_context;

typedef struct
{
/* Table being analyzed */
Relation onerel;

/* whether acquire inherited table's correlations */
bool inherited;

/*
* Result tuple descriptor.
*/
TupleDesc outDesc;

/* SRF state, to track which rows have already been returned. */
int index;
int totalAttr;
} gp_acquire_correlation_context;

/* GUC parameters */
extern PGDLLIMPORT int default_statistics_target; /* PGDLLIMPORT for PostGIS */
extern int vacuum_freeze_min_age;
Expand Down Expand Up @@ -416,6 +441,7 @@ extern int acquire_inherited_sample_rows(Relation onerel, int elevel,

/* in commands/analyzefuncs.c */
extern Datum gp_acquire_sample_rows(PG_FUNCTION_ARGS);
extern Datum gp_acquire_correlations(PG_FUNCTION_ARGS);
extern Oid gp_acquire_sample_rows_col_type(Oid typid);

extern bool gp_vacuum_needs_update_stats(void);
Expand Down
Loading