@@ -170,6 +170,7 @@ static MemoryContext anl_context = NULL;
170170static BufferAccessStrategy vac_strategy ;
171171
172172Bitmapset * * acquire_func_colLargeRowIndexes ;
173+ double * acquire_func_colLargeRowLength ;
173174
174175
175176static void do_analyze_rel (Relation onerel ,
@@ -495,6 +496,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
495496 int save_nestlevel ;
496497
497498 Bitmapset * * colLargeRowIndexes ;
499+ double * colLargeRowLength ;
498500 bool sample_needed ;
499501
500502 int64 AnalyzePageHit = VacuumPageHit ;
@@ -721,6 +723,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
721723 * Maintain information if the row of a column exceeds WIDTH_THRESHOLD
722724 */
723725 colLargeRowIndexes = (Bitmapset * * ) palloc0 (sizeof (Bitmapset * ) * onerel -> rd_att -> natts );
726+ colLargeRowLength = (double * )palloc0 (sizeof (double ) * onerel -> rd_att -> natts );
724727
725728 if ((params -> options & VACOPT_FULLSCAN ) != 0 )
726729 {
@@ -742,10 +745,18 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
742745 /*
743746 * Acquire the sample rows
744747 *
745- * colLargeRowindexes is passed out-of-band, in a global variable,
748+ * colLargeRowIndexes is passed out-of-band, in a global variable,
746749 * to avoid changing the function signature from upstream's.
750+ *
751+ * The same as colLargeRowIndexes. colLargeRowLength stores total
752+ * length of too wide rows in the sample for every attribute of
753+ * the target relation. ANALYZE ignores too wide columns during
754+ * analysis(See comments of WIDTH_THRESHOLD), the stawidth can be
755+ * far smaller than the real average width for varlena datums which
756+ * are larger than WIDTH_THRESHOLD but stored uncompressed.
747757 */
748758 acquire_func_colLargeRowIndexes = colLargeRowIndexes ;
759+ acquire_func_colLargeRowLength = colLargeRowLength ;
749760 pgstat_progress_update_param (PROGRESS_ANALYZE_PHASE ,
750761 inh ? PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS_INH :
751762 PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS );
@@ -758,6 +769,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
758769 rows , targrows ,
759770 & totalrows , & totaldeadrows );
760771 acquire_func_colLargeRowIndexes = NULL ;
772+ acquire_func_colLargeRowLength = NULL ;
761773 if (ctx )
762774 MemoryContextSwitchTo (anl_context );
763775 }
@@ -892,6 +904,12 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
892904 get_attribute_options (onerel -> rd_id , stats -> attr -> attnum );
893905
894906 stats -> tupDesc = onerel -> rd_att ;
907+ /*
908+ * get total length and number of too wide rows in the sample,
909+ * in case get wrong stawidth.
910+ */
911+ stats -> totalwidelength = colLargeRowLength [stats -> attr -> attnum - 1 ];
912+ stats -> widerow_num = numrows - validRowsLength ;
895913
896914 if (validRowsLength > 0 )
897915 {
@@ -944,7 +962,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
944962 // that every item was >= WIDTH_THRESHOLD in width.
945963 stats -> stats_valid = true;
946964 stats -> stanullfrac = 0.0 ;
947- stats -> stawidth = WIDTH_THRESHOLD ;
965+ stats -> stawidth = stats -> totalwidelength / numrows ;
948966 stats -> stadistinct = 0.0 ; /* "unknown" */
949967 }
950968 stats -> rows = rows ; // Reset to original rows
@@ -2419,6 +2437,154 @@ acquire_index_number_of_blocks(Relation indexrel, Relation tablerel)
24192437 }
24202438}
24212439
2440+ /*
2441+ * parse_record_to_string
2442+ *
2443+ * CDB: a copy of record_in, but only parse the record string
2444+ * into separate strs for each column.
2445+ */
2446+ static void
2447+ parse_record_to_string (char * string , TupleDesc tupdesc , char * * values , bool * nulls )
2448+ {
2449+ char * ptr ;
2450+ int ncolumns ;
2451+ int i ;
2452+ bool needComma ;
2453+ StringInfoData buf ;
2454+
2455+ Assert (string != NULL );
2456+ Assert (values != NULL );
2457+ Assert (nulls != NULL );
2458+
2459+ ncolumns = tupdesc -> natts ;
2460+ needComma = false;
2461+
2462+ /*
2463+ * Scan the string. We use "buf" to accumulate the de-quoted data for
2464+ * each column, which is then fed to the appropriate input converter.
2465+ */
2466+ ptr = string ;
2467+
2468+ /* Allow leading whitespace */
2469+ while (* ptr && isspace ((unsigned char ) * ptr ))
2470+ ptr ++ ;
2471+ if (* ptr ++ != '(' )
2472+ {
2473+ ereport (ERROR ,
2474+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
2475+ errmsg ("malformed record literal: \"%s\"" , string ),
2476+ errdetail ("Missing left parenthesis." )));
2477+ }
2478+
2479+ initStringInfo (& buf );
2480+
2481+ for (i = 0 ; i < ncolumns ; i ++ )
2482+ {
2483+ /* Ignore dropped columns in datatype, but fill with nulls */
2484+ if (TupleDescAttr (tupdesc , i )-> attisdropped )
2485+ {
2486+ values [i ] = NULL ;
2487+ nulls [i ] = true;
2488+ continue ;
2489+ }
2490+
2491+ if (needComma )
2492+ {
2493+ /* Skip comma that separates prior field from this one */
2494+ if (* ptr == ',' )
2495+ ptr ++ ;
2496+ else
2497+ {
2498+ /* *ptr must be ')' */
2499+ ereport (ERROR ,
2500+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
2501+ errmsg ("malformed record literal: \"%s\"" , string ),
2502+ errdetail ("Too few columns." )));
2503+ }
2504+ }
2505+
2506+ /* Check for null: completely empty input means null */
2507+ if (* ptr == ',' || * ptr == ')' )
2508+ {
2509+ values [i ] = NULL ;
2510+ nulls [i ] = true;
2511+ }
2512+ else
2513+ {
2514+ /* Extract string for this column */
2515+ bool inquote = false;
2516+
2517+ resetStringInfo (& buf );
2518+ while (inquote || !(* ptr == ',' || * ptr == ')' ))
2519+ {
2520+ char ch = * ptr ++ ;
2521+
2522+ if (ch == '\0' )
2523+ {
2524+ ereport (ERROR ,
2525+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
2526+ errmsg ("malformed record literal: \"%s\"" ,
2527+ string ),
2528+ errdetail ("Unexpected end of input." )));
2529+ }
2530+ if (ch == '\\' )
2531+ {
2532+ if (* ptr == '\0' )
2533+ {
2534+ ereport (ERROR ,
2535+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
2536+ errmsg ("malformed record literal: \"%s\"" ,
2537+ string ),
2538+ errdetail ("Unexpected end of input." )));
2539+ }
2540+ appendStringInfoChar (& buf , * ptr ++ );
2541+ }
2542+ else if (ch == '"' )
2543+ {
2544+ if (!inquote )
2545+ inquote = true;
2546+ else if (* ptr == '"' )
2547+ {
2548+ /* doubled quote within quote sequence */
2549+ appendStringInfoChar (& buf , * ptr ++ );
2550+ }
2551+ else
2552+ inquote = false;
2553+ }
2554+ else
2555+ appendStringInfoChar (& buf , ch );
2556+ }
2557+
2558+ values [i ] = palloc (strlen (buf .data ) + 1 );
2559+ memcpy (values [i ], buf .data , strlen (buf .data ) + 1 );
2560+ nulls [i ] = false;
2561+ }
2562+
2563+ /*
2564+ * Prep for next column
2565+ */
2566+ needComma = true;
2567+ }
2568+
2569+ if (* ptr ++ != ')' )
2570+ {
2571+ ereport (ERROR ,
2572+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
2573+ errmsg ("malformed record literal: \"%s\"" , string ),
2574+ errdetail ("Too many columns." )));
2575+ }
2576+ /* Allow trailing whitespace */
2577+ while (* ptr && isspace ((unsigned char ) * ptr ))
2578+ ptr ++ ;
2579+ if (* ptr )
2580+ {
2581+ ereport (ERROR ,
2582+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
2583+ errmsg ("malformed record literal: \"%s\"" , string ),
2584+ errdetail ("Junk after right parenthesis." )));
2585+ }
2586+ }
2587+
24222588/*
24232589 * Build a querydesc for a sql, set "dest" to portal->holdStore
24242590 */
@@ -2493,6 +2659,7 @@ process_sample_rows(Portal portal,
24932659 * global variable to avoid changing the AcquireSampleRowsFunc prototype.
24942660 */
24952661 Bitmapset * * colLargeRowIndexes = acquire_func_colLargeRowIndexes ;
2662+ double * colLargeRowLength = acquire_func_colLargeRowLength ;
24962663 TupleDesc relDesc = RelationGetDescr (onerel );
24972664 TupleDesc funcTupleDesc ;
24982665 TupleDesc sampleTupleDesc ;
@@ -2538,7 +2705,7 @@ process_sample_rows(Portal portal,
25382705 TupleDescInitEntry (funcTupleDesc , (AttrNumber ) 1 , "" , FLOAT8OID , -1 , 0 );
25392706 TupleDescInitEntry (funcTupleDesc , (AttrNumber ) 2 , "" , FLOAT8OID , -1 , 0 );
25402707 TupleDescInitEntry (funcTupleDesc , (AttrNumber ) 3 , "" , FLOAT8ARRAYOID , -1 , 0 );
2541-
2708+
25422709 for (i = 0 ; i < relDesc -> natts ; i ++ )
25432710 {
25442711 Form_pg_attribute attr = TupleDescAttr (relDesc , i );
@@ -2669,17 +2836,18 @@ process_sample_rows(Portal portal,
26692836 deconstruct_array (arrayVal , FLOAT8OID , 8 , true, 'd' ,
26702837 & largelength , & nulls , & numelems );
26712838
2672- index = 0 ;
26732839 for (i = 0 ; i < relDesc -> natts ; i ++ )
26742840 {
26752841 Form_pg_attribute attr = TupleDescAttr (relDesc , i );
26762842
26772843 if (attr -> attisdropped )
26782844 continue ;
26792845
2680- if (toolarge [index ] == '1' )
2846+ if (largelength [i ] != (Datum ) 0 )
2847+ {
26812848 colLargeRowIndexes [i ] = bms_add_member (colLargeRowIndexes [i ], sampleTuples );
2682- index ++ ;
2849+ colLargeRowLength [i ] += DatumGetFloat8 (largelength [i ]);
2850+ }
26832851 }
26842852 }
26852853
@@ -3421,7 +3589,7 @@ compute_distinct_stats(VacAttrStatsP stats,
34213589 /* Do the simple null-frac and width stats */
34223590 stats -> stanullfrac = (double ) null_cnt / (double ) samplerows ;
34233591 if (is_varwidth )
3424- stats -> stawidth = total_width / (double ) nonnull_cnt ;
3592+ stats -> stawidth = ( total_width + stats -> totalwidelength ) / (double ) ( nonnull_cnt + stats -> widerow_num ) ;
34253593 else
34263594 stats -> stawidth = stats -> attrtype -> typlen ;
34273595
@@ -3603,7 +3771,7 @@ compute_distinct_stats(VacAttrStatsP stats,
36033771 stats -> stawidth = 0 ; /* "unknown" */
36043772 else
36053773 stats -> stawidth = stats -> attrtype -> typlen ;
3606- stats -> stadistinct = 0.0 ; /* "unknown" */
3774+ stats -> stadistinct = 0.0 ; /* "unknown" */
36073775 }
36083776
36093777 /* We don't need to bother cleaning up any of our temporary palloc's */
@@ -3814,7 +3982,7 @@ compute_scalar_stats(VacAttrStatsP stats,
38143982 /* Do the simple null-frac and width stats */
38153983 stats -> stanullfrac = (double ) null_cnt / (double ) samplerows ;
38163984 if (is_varwidth )
3817- stats -> stawidth = total_width / (double ) nonnull_cnt ;
3985+ stats -> stawidth = ( total_width + stats -> totalwidelength ) / (double ) ( nonnull_cnt + stats -> widerow_num ) ;
38183986 else
38193987 stats -> stawidth = stats -> attrtype -> typlen ;
38203988
@@ -4167,7 +4335,7 @@ compute_scalar_stats(VacAttrStatsP stats,
41674335 /* Do the simple null-frac and width stats */
41684336 stats -> stanullfrac = (double ) null_cnt / (double ) samplerows ;
41694337 if (is_varwidth )
4170- stats -> stawidth = total_width / (double ) nonnull_cnt ;
4338+ stats -> stawidth = ( total_width + stats -> totalwidelength ) / (double ) ( nonnull_cnt + stats -> widerow_num ) ;
41714339 else
41724340 stats -> stawidth = stats -> attrtype -> typlen ;
41734341 /* Assume all too-wide values are distinct, so it's a unique column */
0 commit comments