analyze.c
来自「PostgreSQL 8.1.4的源码 适用于Linux下的开源数据库系统」· C语言 代码 · 共 2,242 行 · 第 1/5 页
C
2,242 行
qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows); /* * Estimate total numbers of rows in relation. */ if (bs.m > 0) { *totalrows = floor((liverows * totalblocks) / bs.m + 0.5); *totaldeadrows = floor((deadrows * totalblocks) / bs.m + 0.5); } else { *totalrows = 0.0; *totaldeadrows = 0.0; } /* * Emit some interesting relation info */ ereport(elevel, (errmsg("\"%s\": scanned %d of %u pages, " "containing %.0f live rows and %.0f dead rows; " "%d rows in sample, %.0f estimated total rows", RelationGetRelationName(onerel), bs.m, totalblocks, liverows, deadrows, numrows, *totalrows))); return numrows;}/* Select a random value R uniformly distributed in 0 < R < 1 */static doublerandom_fract(void){ long z; /* random() can produce endpoint values, try again if so */ do { z = random(); } while (z <= 0 || z >= MAX_RANDOM_VALUE); return (double) z / (double) MAX_RANDOM_VALUE;}/* * These two routines embody Algorithm Z from "Random sampling with a * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1 * (Mar. 1985), Pages 37-57. Vitter describes his algorithm in terms * of the count S of records to skip before processing another record. * It is computed primarily based on t, the number of records already read. * The only extra state needed between calls is W, a random state variable. * * init_selection_state computes the initial W value. * * Given that we've already read t records (t >= n), get_next_S * determines the number of records to skip before the next record is * processed. */static doubleinit_selection_state(int n){ /* Initial value of W (for use when Algorithm Z is first applied) */ return exp(-log(random_fract()) / n);}static doubleget_next_S(double t, int n, double *stateptr){ double S; /* The magic constant here is T from Vitter's paper */ if (t <= (22.0 * n)) { /* Process records using Algorithm X until t is large enough */ double V, quot; V = random_fract(); /* Generate V */ S = 0; t += 1; /* Note: "num" in Vitter's code is always equal to t - n */ quot = (t - (double) n) / t; /* Find min S satisfying (4.1) */ while (quot > V) { S += 1; t += 1; quot *= (t - (double) n) / t; } } else { /* Now apply Algorithm Z */ double W = *stateptr; double term = t - (double) n + 1; for (;;) { double numer, numer_lim, denom; double U, X, lhs, rhs, y, tmp; /* Generate U and X */ U = random_fract(); X = t * (W - 1.0); S = floor(X); /* S is tentatively set to floor(X) */ /* Test if U <= h(S)/cg(X) in the manner of (6.3) */ tmp = (t + 1) / term; lhs = exp(log(((U * tmp * tmp) * (term + S)) / (t + X)) / n); rhs = (((t + X) / (term + S)) * term) / t; if (lhs <= rhs) { W = rhs / lhs; break; } /* Test if U <= f(S)/cg(X) */ y = (((U * (t + 1)) / term) * (t + S + 1)) / (t + X); if ((double) n < S) { denom = t; numer_lim = term + S; } else { denom = t - (double) n + S; numer_lim = t + 1; } for (numer = t + S; numer >= numer_lim; numer -= 1) { y *= numer / denom; denom -= 1; } W = exp(-log(random_fract()) / n); /* Generate W in advance */ if (exp(log(y) / n) <= (t + X) / t) break; } *stateptr = W; } return S;}/* * qsort comparator for sorting rows[] array */static intcompare_rows(const void *a, const void *b){ HeapTuple ha = *(HeapTuple *) a; HeapTuple hb = *(HeapTuple *) b; BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self); OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self); BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self); OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self); if (ba < bb) return -1; if (ba > bb) return 1; if (oa < ob) return -1; if (oa > ob) return 1; return 0;}/* * update_attstats() -- update attribute statistics for one relation * * Statistics are stored in several places: the pg_class row for the * relation has stats about the whole relation, and there is a * pg_statistic row for each (non-system) attribute that has ever * been analyzed. The pg_class values are updated by VACUUM, not here. * * pg_statistic rows are just added or updated normally. This means * that pg_statistic will probably contain some deleted rows at the * completion of a vacuum cycle, unless it happens to get vacuumed last. * * To keep things simple, we punt for pg_statistic, and don't try * to compute or store rows for pg_statistic itself in pg_statistic. * This could possibly be made to work, but it's not worth the trouble. * Note analyze_rel() has seen to it that we won't come here when * vacuuming pg_statistic itself. * * Note: if two backends concurrently try to analyze the same relation, * the second one is likely to fail here with a "tuple concurrently * updated" error. This is slightly annoying, but no real harm is done. * We could prevent the problem by using a stronger lock on the * relation for ANALYZE (ie, ShareUpdateExclusiveLock instead * of AccessShareLock); but that cure seems worse than the disease, * especially now that ANALYZE doesn't start a new transaction * for each relation. The lock could be held for a long time... */static voidupdate_attstats(Oid relid, int natts, VacAttrStats **vacattrstats){ Relation sd; int attno; if (natts <= 0) return; /* nothing to do */ sd = heap_open(StatisticRelationId, RowExclusiveLock); for (attno = 0; attno < natts; attno++) { VacAttrStats *stats = vacattrstats[attno]; HeapTuple stup, oldtup; int i, k, n; Datum values[Natts_pg_statistic]; char nulls[Natts_pg_statistic]; char replaces[Natts_pg_statistic]; /* Ignore attr if we weren't able to collect stats */ if (!stats->stats_valid) continue; /* * Construct a new pg_statistic tuple */ for (i = 0; i < Natts_pg_statistic; ++i) { nulls[i] = ' '; replaces[i] = 'r'; } i = 0; values[i++] = ObjectIdGetDatum(relid); /* starelid */ values[i++] = Int16GetDatum(stats->attr->attnum); /* staattnum */ values[i++] = Float4GetDatum(stats->stanullfrac); /* stanullfrac */ values[i++] = Int32GetDatum(stats->stawidth); /* stawidth */ values[i++] = Float4GetDatum(stats->stadistinct); /* stadistinct */ for (k = 0; k < STATISTIC_NUM_SLOTS; k++) { values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */ } for (k = 0; k < STATISTIC_NUM_SLOTS; k++) { values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */ } for (k = 0; k < STATISTIC_NUM_SLOTS; k++) { int nnum = stats->numnumbers[k]; if (nnum > 0) { Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum)); ArrayType *arry; for (n = 0; n < nnum; n++) numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]); /* XXX knows more than it should about type float4: */ arry = construct_array(numdatums, nnum, FLOAT4OID, sizeof(float4), false, 'i'); values[i++] = PointerGetDatum(arry); /* stanumbersN */ } else { nulls[i] = 'n'; values[i++] = (Datum) 0; } } for (k = 0; k < STATISTIC_NUM_SLOTS; k++) { if (stats->numvalues[k] > 0) { ArrayType *arry; arry = construct_array(stats->stavalues[k], stats->numvalues[k], stats->attr->atttypid, stats->attrtype->typlen, stats->attrtype->typbyval, stats->attrtype->typalign); values[i++] = PointerGetDatum(arry); /* stavaluesN */ } else { nulls[i] = 'n'; values[i++] = (Datum) 0; } } /* Is there already a pg_statistic tuple for this attribute? */ oldtup = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(stats->attr->attnum), 0, 0); if (HeapTupleIsValid(oldtup)) { /* Yes, replace it */ stup = heap_modifytuple(oldtup, RelationGetDescr(sd), values, nulls, replaces); ReleaseSysCache(oldtup); simple_heap_update(sd, &stup->t_self, stup); } else { /* No, insert new tuple */ stup = heap_formtuple(sd->rd_att, values, nulls); simple_heap_insert(sd, stup); } /* update indexes too */ CatalogUpdateIndexes(sd, stup); heap_freetuple(stup); } heap_close(sd, RowExclusiveLock);}/* * Standard fetch function for use by compute_stats subroutines. * * This exists to provide some insulation between compute_stats routines * and the actual storage of the sample data. */static Datumstd_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull){ int attnum = stats->tupattnum; HeapTuple tuple = stats->rows[rownum]; TupleDesc tupDesc = stats->tupDesc; return heap_getattr(tuple, attnum, tupDesc, isNull);}/* * Fetch function for analyzing index expressions. * * We have not bothered to construct index tuples, instead the data is * just in Datum arrays. */static Datumind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull){ int i; /* exprvals and exprnulls are already offset for proper column */ i = rownum * stats->rowstride; *isNull = stats->exprnulls[i]; return stats->exprvals[i];}/*========================================================================== * * Code below this point represents the "standard" type-specific statistics * analysis algorithms. This code can be replaced on a per-data-type basis * by setting a nonzero value in pg_type.typanalyze. * *========================================================================== *//* * To avoid consuming too much memory during analysis and/or too much space * in the resulting pg_statistic rows, we ignore varlena datums that are wider * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV * and distinct-value calculations since a wide value is unlikely to be * duplicated at all, much less be a most-common value. For the same reason, * ignoring wide values will not affect our estimates of histogram bin * boundaries very much. */#define WIDTH_THRESHOLD 1024#define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)#define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)/* * Extra information used by the default analysis routines */typedef struct{ Oid eqopr; /* '=' operator for datatype, if any */ Oid eqfunc; /* and associated function */ Oid ltopr; /* '<' operator for datatype, if any */} StdAnalyzeData;typedef struct{ Datum value; /* a data value */ int tupno; /* position index for tuple it came from */} ScalarItem;typedef struct{ int count; /* # of duplicates */ int first; /* values[] index of first occurrence */} ScalarMCVItem;/* context information for compare_scalars() */static FmgrInfo *datumCmpFn;static SortFunctionKind datumCmpFnKind;static int *datumCmpTupnoLink;static void compute_minimal_stats(VacAttrStatsP stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows);static void compute_scalar_stats(VacAttrStatsP stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows);static int compare_scalars(const void *a, const void *b);static int compare_mcvs(const void *a, const void *b);/* * std_typanalyze -- the default type-specific typanalyze function */static boolstd_typanalyze(VacAttrStats *stats){ Form_pg_attribute attr = stats->attr; Operator func_operator; Oid eqopr = InvalidOid; Oid eqfunc = InvalidOid; Oid ltopr = InvalidOid; StdAnalyzeData *mystats; /* If the attstattarget column is negative, use the default value */ /* NB: it is okay to scribble on stats->attr since it's a copy */ if (attr->attstattarget < 0) attr->attstattarget = default_statistics_target; /* If column has no "=" operator, we can't do much of anything */ func_operator = equality_oper(attr->atttypid, true); if (func_operator != NULL) { eqopr = oprid(func_operator);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?