Author: 令猴
MySQL从8.0.13开始支持functional index。Functional index类似于ORACLE的Function-Based Indexes。该索引可以根据将索引定义的表达式的值按照索引顺序存到索引里,进而减少表达式的计算,加速查询。
下面我们看一下如何创建一个functional index:
CREATE TABLE t1 (col1 INT, col2 INT, INDEX func_index ((ABS(col1))));
CREATE INDEX idx1 ON t1 ((col1 + col2));
CREATE INDEX idx2 ON t1 ((col1 + col2), (col1 - col2), col1);
ALTER TABLE t1 ADD INDEX ((col1 * 40) DESC);
接下来我们继续看一下functional index的效果:
mysql> CREATE TABLE t1 (col1 INT, col2 INT);
Query OK, 0 rows affected (0.13 sec)
mysql> SELECT * FROM t1 WHERE col1+col2 > 10;
Empty set (0.01 sec)
mysql> EXPLAIN SELECT * FROM t1 WHERE col1+col2 > 10;
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------+
| 1 | SIMPLE | t1 | NULL | ALL | NULL | NULL | NULL | NULL | 1 | 100.00 | Using where |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------+
1 row in set, 1 warning (0.01 sec)
mysql> CREATE INDEX idx1 ON t1 ((col1 + col2));
Query OK, 0 rows affected (0.14 sec)
Records: 0 Duplicates: 0 Warnings: 0
mysql> EXPLAIN SELECT * FROM t1 WHERE col1+col2 > 10;
+----+-------------+-------+------------+-------+---------------+------+---------+------+------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+------+---------+------+------+----------+-------------+
| 1 | SIMPLE | t1 | NULL | range | idx1 | idx1 | 9 | NULL | 1 | 100.00 | Using where |
+----+-------------+-------+------------+-------+---------------+------+---------+------+------+----------+-------------+
1 row in set, 1 warning (0.00 sec)
从上面的例子中我们可以看到查询中使用了functional索引 idx1来加速查询。
MySQL的functinal index是利用generated column来辅助实现的,后面的章节中我们会详细的进行分析。所以对于创建functional index的一些限制可以参考:创建generated column 以及增加generated column。
下面我们从源码来看一下MySQL functional index的实现过程。
上面的流程图是MySQL创建functional index的一个基本流程。我们重点看一下add_functional_index_to_create_list这个函数的处理过程。
/**
Prepares a functional index by adding a hidden indexed generated column for the key part.
A functional index is implemented as a hidden generated column over the
expression specified in the index, and the hidden generated column is then indexed. This function adds a hidden generated column to the Create_list, and updates the key specification to point to this new column. The generated column is given a name that is a hash of the key name and the key part number.
*/
static bool add_functional_index_to_create_list(THD *thd,
Key_spec *key_spec,
Alter_info *alter_info,
Key_part_spec *kp,
uint key_part_number,
HA_CREATE_INFO *create_info) {
// A functional index cannot be a primary key
/* 这里限制了functional index 不能作为主键,因为它是个generated column */
if (key_spec->type == KEYTYPE_PRIMARY) {
my_error(ER_FUNCTIONAL_INDEX_PRIMARY_KEY, MYF(0));
return true;
}
// If the key isn't given a name explicitly by the user, we must auto-generate
// a name here. "Normal" indexes will be given a name in prepare_key(), but
// that is too late for functional indexes since we want the hidden generated
// column name to be based on the index name.
// 生成一个默认的索引名称
if (key_spec->name.str == nullptr) {
std::string key_name;
int count = 2;
key_name.assign("functional_index");
while (key_name_exists(alter_info->key_list, key_name, nullptr)) {
key_name.assign("functional_index_");
key_name.append(std::to_string(count++));
}
key_spec->name.length = key_name.size();
key_spec->name.str = strmake_root(thd->stmt_arena->mem_root,
key_name.c_str(), key_name.size());
} else { if (key_name_exists(alter_info->key_list,
{key_spec->name.str, key_spec->name.length},
key_spec)) {
my_error(ER_DUP_KEYNAME, MYF(0), key_spec->name.str);
return true;
}
}
// First we need to resolve the expression in the functional index so that we
// know the correct collation, data type, length etc...
ulong saved_privilege = thd->want_privilege;
thd->want_privilege = SELECT_ACL;
{
// Create a scope guard so that we are guaranteed that the privileges are
// set back to the original value.
auto handler_guard = create_scope_guard(
[thd, saved_privilege]() { thd->want_privilege = saved_privilege; });
Functional_index_error_handler error_handler(
{key_spec->name.str, key_spec->name.length}, thd);
Item *expr = kp->get_expression();
if (expr->type() == Item::FIELD_ITEM) {
my_error(ER_FUNCTIONAL_INDEX_ON_FIELD, MYF(0));
return true;
}
// 这里验证表达式的合法性,是否违反generated column的约束条件
if (pre_validate_value_generator_expr(kp->get_expression(),
key_spec->name.str, true)) {
return true;
}
Replace_field_processor_arg replace_field_argument(
thd, &alter_info->create_list, create_info, key_spec->name.str);
if (expr->walk(&Item::replace_field_processor, Item::WALK_PREFIX,
reinterpret_cast<uchar *>(&replace_field_argument))) {
return true;
}
if (kp->resolve_expression(thd)) return true;
}
// 默认隐式列生成一个名字
const char *field_name = make_functional_index_column_name(
{key_spec->name.str, key_spec->name.length}, key_part_number,
thd->stmt_arena->mem_root);
Item *item = kp->get_expression();
// Ensure that we aren't trying to index a field
DBUG_ASSERT(item->type() != Item::FIELD_ITEM); TABLE tmp_table;
TABLE_SHARE share;
tmp_table.s = &share;
init_tmp_table_share(thd, &share, "", 0, "", "", nullptr);
tmp_table.s->db_create_options = 0;
tmp_table.s->db_low_byte_first = false;
tmp_table.set_not_started();
// 生成generated column的创建信息
Create_field *cr = generate_create_field(thd, item, &tmp_table);
if (cr == nullptr) {
return true; /* purecov: deadcode */
}
if (is_blob(cr->sql_type)) {
my_error(ER_FUNCTIONAL_INDEX_ON_LOB, MYF(0));
return true;
}
cr->field_name = field_name;
cr->field = nullptr;
cr->hidden = dd::Column::enum_hidden_type::HT_HIDDEN_SQL;
cr->stored_in_db = false;
Value_generator *gcol_info = new (thd->mem_root) Value_generator();
gcol_info->expr_item = kp->get_expression();
// 生成一个virtual generated column
gcol_info->set_field_stored(false);
gcol_info->set_field_type(cr->sql_type); cr->gcol_info = gcol_info;
alter_info->create_list.push_back(cr);
alter_info->flags |= Alter_info::ALTER_ADD_COLUMN;
// 这里将KEY的索引列设置为隐式generated column
kp->set_name_and_prefix_length(field_name, 0);
return false;
}
函数的注释里面说的非常详细,functional index的创建过程依赖于generated column来做辅助。创建functional index的时候都要隐式的创建一个generated column,然后在该generated column上创建对应的索引。
上面我们看到了源码中是如何创建一个functional index。那么接下来我们继续看一下MySQL是如何为查询寻找合适的functional index的。
就拿上面的例子看一下调用堆栈:
EXPLAIN SELECT * FROM t1 WHERE col1+col2 > 10;
#0 substitute_gc (thd=0x2aab94000be0, select_lex=0x2aab94270298, where_cond=0x2aab94271ec8, group_list=0x0, order=0x0)
#1 0x0000000003049283 in JOIN::optimize (this=0x2aab94272750)
#2 0x0000000003165c32 in SELECT_LEX::optimize (this=0x2aab94270298, thd=0x2aab94000be0)
#3 0x000000000316221c in Sql_cmd_dml::execute_inner (this=0x2aab94272078, thd=0x2aab94000be0)
#4 0x00000000031614d3 in Sql_cmd_dml::execute (this=0x2aab94272078, thd=0x2aab94000be0)
#5 0x00000000030a7396 in mysql_execute_command (thd=0x2aab94000be0, first_level=true)
#6 0x00000000030ac74b in mysql_parse (thd=0x2aab94000be0, parser_state=0x2aab8c2462d0, force_primary_storage_engine=false)
#7 0x0000000003095b0d in dispatch_command (thd=0x2aab94000be0, com_data=0x2aab8c246c40, command=COM_QUERY)
#8 0x0000000003091d7d in do_command (thd=0x2aab94000be0)
#9 0x00000000033d145b in handle_connection (arg=0xcb9cee0)
#10 0x00000000066cd007 in pfs_spawn_thread (arg=0xca3bde0)
#11 0x00002aaaaacd4aa1 in start_thread () from /lib64/libpthread.so.0
#12 0x00002aaaabfb993d in clone () from /lib64/libc.so.6
上面的堆栈可以看到优化器调用了substitute_gc这个函数,这个函数就可以将WHERE,GROUP_BY 以及ORDER BY中的相关表达式替换为隐式的generated column,进而可以让优化器来选择functional index。我们再来研究一下substitute_gc这个函数的源码。
bool substitute_gc(THD *thd, SELECT_LEX *select_lex, Item *where_cond,
ORDER *group_list, ORDER *order) {
List<Field> indexed_gc;
Opt_trace_context *const trace = &thd->opt_trace;
Opt_trace_object trace_wrapper(trace);
Opt_trace_object subst_gc(trace, "substitute_generated_columns");
// Collect all GCs that are a part of a key
// 这里要遍历所有的表来收集所有可以被替换的generated columns。后面的代码中会分析哪些表达式可以被替换
for (TABLE_LIST *tl = select_lex->leaf_tables; tl; tl = tl->next_leaf) {
if (tl->table->s->keys == 0) continue;
for (uint i = 0; i < tl->table->s->fields; i++) {
Field *fld = tl->table->field[i];
// 这里判断只有在索引中的列并且generated column可以用来替换表达式才会作为候选的列。
if (fld->is_gcol() &&
!(fld->part_of_key.is_clear_all() &&
fld->part_of_prefixkey.is_clear_all()) &&
fld->gcol_info->expr_item->can_be_substituted_for_gc()) {
// Don't check allowed keys here as conditions/group/order use
// different keymaps for that.
indexed_gc.push_back(fld);
}
}
} // No GC in the tables used in the query
if (indexed_gc.elements == 0) return false;
if (where_cond) {
// Item_func::compile will dereference this pointer, provide valid value.
uchar i, *dummy = &i;
/**
这里会利用generated column来替换where_cond里面对应的表达式。
Item::gc_subst_analyzer 该虚函数定义了每一种Item是否需要进行generated column的替换过程
Item::gc_subst_transformer 该函数定义了每一种可替换的Item如何利用generated column进行替换
*/
where_cond->compile(&Item::gc_subst_analyzer, &dummy,
&Item::gc_subst_transformer,
(uchar *)&indexed_gc);
subst_gc.add("resulting_condition", where_cond);
}
if (!(group_list || order)) return false;
// Filter out GCs that do not have index usable for GROUP/ORDER
Field *gc;
List_iterator<Field> li(indexed_gc);
while ((gc = li++)) {
Key_map tkm = gc->part_of_key;
// 这里判断generated column相关的索引是否与group-by 或者 order-by的列有交集,如果没有相关性,就忽略。
tkm.intersect(group_list ? gc->table->keys_in_use_for_group_by
: gc->table->keys_in_use_for_order_by);
if (tkm.is_clear_all()) li.remove();
}
if (!indexed_gc.elements) return false;
// Index could be used for ORDER only if there is no GROUP
ORDER *list = group_list ? group_list : order;
bool changed = false;
for (ORDER *ord = list; ord; ord = ord->next) {
li.rewind();
// 这里判断group-by或者order-by的列是否是表达式或者函数来进行generated column替换。
if (!(*ord->item)->can_be_substituted_for_gc()) continue;
while ((gc = li++)) {
Item_func *tmp = pointer_cast<Item_func *>(*ord->item);
Item_field *field;
// 这里会根据表达式与generated column->gcol_info->expr_item进行比较来获取匹配的generated column
if ((field = get_gc_for_expr(&tmp, gc, gc->result_type()))) {
changed = true;
/* Add new field to field list. */
ord->item = select_lex->add_hidden_item(field);
break;
}
}
}
if (changed && trace->is_started()) {
String str;
SELECT_LEX::print_order(
&str, list,
enum_query_type(QT_TO_SYSTEM_CHARSET | QT_SHOW_SELECT_NUMBER |
QT_NO_DEFAULT_DB));
subst_gc.add_utf8(group_list ? "resulting_GROUP_BY" : "resulting_ORDER_BY",
str.ptr(), str.length());
}
return changed;
}
综上所述,本篇文章主要从源码层面对MySQL 8.0 实现的Functional index进行了一下简要的分析。Functional index主要依赖于generated column,利用内部隐式的创建一个generated column来辅助创建functional index。代码层面也比较容易理解,希望该篇文章能够帮助广大读者了解MySQL functional index的实现原理。