Skip to content

Commit b96d65d

Browse files
zxuejingzhangxing
authored andcommitted
Send rows in binary mode for ANALYZE (#15159)
Send results of select pg_catalog.gp_acquire_sample_rows query in binary mode. That allows to avoid overflow for max double. For example, if run the following prior to this fix: set extra_float_digits to 0; create table t (a double precision); insert into t values (1.7976931348623157e+308); analyze t; the following message will be printed: ERROR: value out of range: overflow For text mode (default) when analyze for table is performed the master calls gp_acquire_sample_rows() helper function on each segment. That eventually calls float8out function on segment to converts float8 number to a string with snprintf: snprintf(ascii, MAXDOUBLEWIDTH + 1, "%.*g", ndig, num); When ndig is 15 the maximum float8 value 1.7976931348623157e+308 is rounded to "1.79769313486232e+308" that has no representation. And on master acquire_sample_rows_dispatcher function process gp_acquire_sample_rows result and eventually float8in function is called to convert string to float8 with strtold: val = strtold(num, &endptr); This is where overflow for "1.79769313486232e+308" happens but works fine for "1.7976931348623157e+308". Transferring in binary mode allows to avoid conversion from double to string on segments and then back to double on master. And this will much faster than before. Using CdbDispatchPlan instead of CdbDispatchCommand allows to receive data in binary mode in MemTuple, and this is much faster than before. And use tuplestore to store received tuples to avoid use too many memory. Co-authored-by: zxuejing <[email protected]>
1 parent 6fdb2b2 commit b96d65d

File tree

8 files changed

+409
-269
lines changed

8 files changed

+409
-269
lines changed

src/backend/commands/analyze.c

Lines changed: 255 additions & 265 deletions
Large diffs are not rendered by default.

src/backend/commands/analyzefuncs.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,15 @@ gp_acquire_sample_rows(PG_FUNCTION_ARGS)
215215

216216
ctx->index = 0;
217217
ctx->summary_sent = false;
218+
/*
219+
* we only get sample data from segindex 0 for replicated table
220+
*/
221+
if (Gp_role == GP_ROLE_EXECUTE && GpPolicyIsReplicated(onerel->rd_cdbpolicy)
222+
&& GpIdentity.segindex > 0)
223+
{
224+
ctx->index = ctx->num_sample_rows;
225+
ctx->summary_sent = true;
226+
}
218227

219228
MemoryContextSwitchTo(oldcontext);
220229
}

src/test/regress/expected/analyze.out

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,26 +1086,57 @@ SELECT correlation FROM pg_stats WHERE tablename ='analyze_table';
10861086
(2 rows)
10871087

10881088
-- test6: randomly table
1089+
-- we use weighted mean algorithm to calculate correlations.
1090+
-- the formula for calculating the weighted mean is:
1091+
-- sum(correlationOnSeg[i] * (totalRowsOnSeg[i] / totalRows))
1092+
-- i is from 0 to N. N is the number of segments.
1093+
-- however, for randomly table the data in each segment may diff each time.
1094+
-- it will affect the value of correlation.
1095+
-- So ignore the results
10891096
drop table analyze_table;
10901097
create table analyze_table(tc1 int,tc2 int) distributed randomly;
10911098
insert into analyze_table select i,i from generate_series(1,100) i;
10921099
analyze analyze_table;
1100+
-- start_ignore
10931101
SELECT correlation FROM pg_stats WHERE tablename ='analyze_table';
10941102
correlation
10951103
-------------
10961104
1
10971105
1
10981106
(2 rows)
10991107

1108+
-- end_ignore
11001109
alter table analyze_table drop column tc1;
11011110
analyze analyze_table;
1111+
-- start_ignore
11021112
SELECT correlation FROM pg_stats WHERE tablename ='analyze_table';
11031113
correlation
11041114
-------------
11051115
1
11061116
(1 row)
11071117

1108-
-- test7: inherit table
1118+
-- end_ignore
1119+
-- test7: replicated table
1120+
drop table analyze_table;
1121+
create table analyze_table(tc1 int,tc2 int) distributed replicated;
1122+
insert into analyze_table select i,i from generate_series(1,100) i;
1123+
analyze analyze_table;
1124+
SELECT correlation FROM pg_stats WHERE tablename ='analyze_table';
1125+
correlation
1126+
-------------
1127+
1
1128+
1
1129+
(2 rows)
1130+
1131+
analyze analyze_table;
1132+
SELECT correlation FROM pg_stats WHERE tablename ='analyze_table';
1133+
correlation
1134+
-------------
1135+
1
1136+
1
1137+
(2 rows)
1138+
1139+
-- test8: inherit table
11091140
drop table analyze_parent cascade;
11101141
ERROR: table "analyze_parent" does not exist
11111142
create table analyze_parent (tc1 int,tc2 int);
@@ -1128,7 +1159,7 @@ SELECT correlation,attname,inherited FROM pg_stats WHERE tablename ='analyze_chi
11281159
-------------+---------+-----------
11291160
(0 rows)
11301161

1131-
-- test8: partition table test
1162+
-- test9: partition table test
11321163
CREATE TABLE partition_table (
11331164
tc1 int,
11341165
tc2 int
@@ -1179,3 +1210,23 @@ SELECT correlation,attname,inherited FROM pg_stats WHERE tablename ='partition_t
11791210
1 | tc2 | f
11801211
(2 rows)
11811212

1213+
--
1214+
-- Test analyze for table with maximum float8 value 1.7976931348623157e+308
1215+
-- There should be no "ERROR: value out of range: overflow"
1216+
--
1217+
set extra_float_digits to 0;
1218+
create table test_max_float8(a double precision);
1219+
insert into test_max_float8 values(1.7976931348623157e+308);
1220+
analyze test_max_float8;
1221+
drop table test_max_float8;
1222+
reset extra_float_digits;
1223+
-- test analyze when table has large column
1224+
create table ttt_large_column(tc1 int,tc2 char(1500),tc3 char(1500));
1225+
insert into ttt_large_column select i,repeat('wwweereeer',150),repeat('ssddbbbbbb',150) from generate_series(1,5) i;
1226+
analyze ttt_large_column;
1227+
drop table ttt_large_column;
1228+
--test analyze replicated table
1229+
create table analyze_replicated(tc1 int,tc2 int) distributed replicated;
1230+
insert into analyze_replicated select i, i from generate_series(1,1000) i;
1231+
analyze analyze_replicated;
1232+
drop table analyze_replicated;

src/test/regress/expected/bfv_dd.out

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ insert into dd_singlecol_1 values(null, null);
1919
INFO: (slice 0) Dispatch command to SINGLE content
2020
INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content
2121
analyze dd_singlecol_1;
22+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
2223
-- ctas tests
2324
create table dd_ctas_1 as select * from dd_singlecol_1 where a=1 distributed by (a);
2425
INFO: (slice 0) Dispatch command to ALL contents: 0 1 2
@@ -317,7 +318,9 @@ insert into dd_singlecol_idx2 values(null, null);
317318
INFO: (slice 0) Dispatch command to SINGLE content
318319
INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content
319320
analyze dd_singlecol_idx;
321+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
320322
analyze dd_singlecol_idx2;
323+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
321324
-- disjunction with index scans
322325
select * from dd_singlecol_idx where (a=1 or a=2) and b<2;
323326
INFO: (slice 1) Dispatch command to PARTIAL contents: 1 0
@@ -366,6 +369,7 @@ insert into dd_singlecol_bitmap_idx values(null, null);
366369
INFO: (slice 0) Dispatch command to SINGLE content
367370
INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content
368371
analyze dd_singlecol_bitmap_idx;
372+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
369373
-- disjunction with bitmap index scans
370374
select * from dd_singlecol_bitmap_idx where (a=1 or a=2) and b<2;
371375
INFO: (slice 1) Dispatch command to PARTIAL contents: 1 0
@@ -442,6 +446,12 @@ insert into dd_singlecol_part_bitmap_idx values(null, null);
442446
INFO: (slice 0) Dispatch command to SINGLE content
443447
INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content
444448
analyze dd_singlecol_part_bitmap_idx;
449+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
450+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
451+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
452+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
453+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
454+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
445455
-- bitmap indexes on partitioned tables
446456
select * from dd_singlecol_part_bitmap_idx where a=1 and b=0;
447457
INFO: (slice 1) Dispatch command to SINGLE content
@@ -487,6 +497,7 @@ insert into dd_multicol_idx values(null, null);
487497
INFO: (slice 0) Dispatch command to SINGLE content
488498
INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content
489499
analyze dd_multicol_idx;
500+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
490501
select count(*) from dd_multicol_idx;
491502
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
492503
count
@@ -610,7 +621,19 @@ insert into dd_singlecol_part_idx2 values(null, null);
610621
INFO: (slice 0) Dispatch command to SINGLE content
611622
INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content
612623
analyze dd_singlecol_part_idx;
624+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
625+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
626+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
627+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
628+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
629+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
613630
analyze dd_singlecol_part_idx2;
631+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
632+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
633+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
634+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
635+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
636+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
614637
-- indexes on partitioned tables
615638
select * from dd_singlecol_part_idx where a=1 and b>0;
616639
INFO: (slice 1) Dispatch command to SINGLE content

src/test/regress/expected/bfv_dd_multicolumn.out

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ insert into dd_multicol_1 values(null, 1);
2929
INFO: (slice 0) Dispatch command to SINGLE content
3030
INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content
3131
analyze dd_multicol_1;
32+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
3233
insert into dd_multicol_2 select g, g%2 from generate_series(1, 100) g;
3334
INFO: (slice 0) Dispatch command to ALL contents: 0 1 2
3435
INFO: (slice 1) Dispatch command to SINGLE content

src/test/regress/expected/bfv_dd_multicolumn_optimizer.out

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ INFO: (slice 0) Dispatch command to ALL contents: 0 1 2
3131
INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2
3232
INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2
3333
analyze dd_multicol_1;
34+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
3435
insert into dd_multicol_2 select g, g%2 from generate_series(1, 100) g;
3536
INFO: (slice 0) Dispatch command to ALL contents: 0 1 2
3637
INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2

src/test/regress/expected/bfv_dd_optimizer.out

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ insert into dd_singlecol_1 values(null, null);
1818
INFO: (slice 0) Dispatch command to SINGLE content
1919
INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content
2020
analyze dd_singlecol_1;
21+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
2122
-- ctas tests
2223
create table dd_ctas_1 as select * from dd_singlecol_1 where a=1 distributed by (a);
2324
INFO: (slice 0) Dispatch command to ALL contents: 0 1 2
@@ -315,7 +316,9 @@ insert into dd_singlecol_idx2 values(null, null);
315316
INFO: (slice 0) Dispatch command to SINGLE content
316317
INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content
317318
analyze dd_singlecol_idx;
319+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
318320
analyze dd_singlecol_idx2;
321+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
319322
-- disjunction with index scans
320323
select * from dd_singlecol_idx where (a=1 or a=2) and b<2;
321324
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
@@ -363,6 +366,7 @@ insert into dd_singlecol_bitmap_idx values(null, null);
363366
INFO: (slice 0) Dispatch command to SINGLE content
364367
INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content
365368
analyze dd_singlecol_bitmap_idx;
369+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
366370
-- disjunction with bitmap index scans
367371
select * from dd_singlecol_bitmap_idx where (a=1 or a=2) and b<2;
368372
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
@@ -438,6 +442,12 @@ insert into dd_singlecol_part_bitmap_idx values(null, null);
438442
INFO: (slice 0) Dispatch command to SINGLE content
439443
INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content
440444
analyze dd_singlecol_part_bitmap_idx;
445+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
446+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
447+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
448+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
449+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
450+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
441451
-- bitmap indexes on partitioned tables
442452
select * from dd_singlecol_part_bitmap_idx where a=1 and b=0;
443453
INFO: (slice 1) Dispatch command to SINGLE content
@@ -483,6 +493,7 @@ INFO: (slice 0) Dispatch command to ALL contents: 0 1 2
483493
INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2
484494
INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2
485495
analyze dd_multicol_idx;
496+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
486497
select count(*) from dd_multicol_idx;
487498
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
488499
count
@@ -604,7 +615,19 @@ insert into dd_singlecol_part_idx2 values(null, null);
604615
INFO: (slice 0) Dispatch command to SINGLE content
605616
INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content
606617
analyze dd_singlecol_part_idx;
618+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
619+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
620+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
621+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
622+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
623+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
607624
analyze dd_singlecol_part_idx2;
625+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
626+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
627+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
628+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
629+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
630+
INFO: (slice 1) Dispatch command to ALL contents: 0 1 2
608631
-- indexes on partitioned tables
609632
select * from dd_singlecol_part_idx where a=1 and b>0;
610633
INFO: (slice 1) Dispatch command to SINGLE content

src/test/regress/sql/analyze.sql

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -551,16 +551,36 @@ analyze analyze_table;
551551
SELECT correlation FROM pg_stats WHERE tablename ='analyze_table';
552552

553553
-- test6: randomly table
554+
-- we use weighted mean algorithm to calculate correlations.
555+
-- the formula for calculating the weighted mean is:
556+
-- sum(correlationOnSeg[i] * (totalRowsOnSeg[i] / totalRows))
557+
-- i is from 0 to N. N is the number of segments.
558+
-- however, for randomly table the data in each segment may diff each time.
559+
-- it will affect the value of correlation.
560+
-- So ignore the results
554561
drop table analyze_table;
555562
create table analyze_table(tc1 int,tc2 int) distributed randomly;
556563
insert into analyze_table select i,i from generate_series(1,100) i;
557564
analyze analyze_table;
565+
-- start_ignore
558566
SELECT correlation FROM pg_stats WHERE tablename ='analyze_table';
567+
-- end_ignore
559568
alter table analyze_table drop column tc1;
560569
analyze analyze_table;
570+
-- start_ignore
561571
SELECT correlation FROM pg_stats WHERE tablename ='analyze_table';
572+
-- end_ignore
562573

563-
-- test7: inherit table
574+
-- test7: replicated table
575+
drop table analyze_table;
576+
create table analyze_table(tc1 int,tc2 int) distributed replicated;
577+
insert into analyze_table select i,i from generate_series(1,100) i;
578+
analyze analyze_table;
579+
SELECT correlation FROM pg_stats WHERE tablename ='analyze_table';
580+
analyze analyze_table;
581+
SELECT correlation FROM pg_stats WHERE tablename ='analyze_table';
582+
583+
-- test8: inherit table
564584
drop table analyze_parent cascade;
565585
create table analyze_parent (tc1 int,tc2 int);
566586
create table analyze_child(tc3 int,tc4 int)inherits (analyze_parent);
@@ -571,7 +591,7 @@ analyze analyze_parent;
571591
SELECT correlation,attname,inherited FROM pg_stats WHERE tablename ='analyze_parent';
572592
SELECT correlation,attname,inherited FROM pg_stats WHERE tablename ='analyze_child';
573593

574-
-- test8: partition table test
594+
-- test9: partition table test
575595
CREATE TABLE partition_table (
576596
tc1 int,
577597
tc2 int
@@ -589,3 +609,25 @@ SELECT correlation,attname,inherited FROM pg_stats WHERE tablename ='partition_t
589609
SELECT correlation,attname,inherited FROM pg_stats WHERE tablename ='partition_table_1_prt_3';
590610
SELECT correlation,attname,inherited FROM pg_stats WHERE tablename ='partition_table_1_prt_4';
591611
SELECT correlation,attname,inherited FROM pg_stats WHERE tablename ='partition_table_1_prt_5';
612+
--
613+
-- Test analyze for table with maximum float8 value 1.7976931348623157e+308
614+
-- There should be no "ERROR: value out of range: overflow"
615+
--
616+
set extra_float_digits to 0;
617+
create table test_max_float8(a double precision);
618+
insert into test_max_float8 values(1.7976931348623157e+308);
619+
analyze test_max_float8;
620+
drop table test_max_float8;
621+
reset extra_float_digits;
622+
623+
-- test analyze when table has large column
624+
create table ttt_large_column(tc1 int,tc2 char(1500),tc3 char(1500));
625+
insert into ttt_large_column select i,repeat('wwweereeer',150),repeat('ssddbbbbbb',150) from generate_series(1,5) i;
626+
analyze ttt_large_column;
627+
drop table ttt_large_column;
628+
629+
--test analyze replicated table
630+
create table analyze_replicated(tc1 int,tc2 int) distributed replicated;
631+
insert into analyze_replicated select i, i from generate_series(1,1000) i;
632+
analyze analyze_replicated;
633+
drop table analyze_replicated;

0 commit comments

Comments
 (0)