8.14 Example

Here we take an example from the test suite.

#
# Tracing of ORDER BY & GROUP BY simplification.
#
SET OPTIMIZER_TRACE="enabled=on",END_MARKERS_IN_JSON=on; # be readable
SET OPTIMIZER_TRACE_MAX_MEM_SIZE=1000000; # avoid small default
CREATE TABLE t1 (
pk INT, col_int_key INT,
col_varchar_key VARCHAR(1), col_varchar_nokey VARCHAR(1)
);
INSERT INTO t1 VALUES
(10,7,'v','v'),(11,0,'s','s'),(12,9,'l','l'),(13,3,'y','y'),(14,4,'c','c'),
(15,2,'i','i'),(16,5,'h','h'),(17,3,'q','q'),(18,1,'a','a'),(19,3,'v','v'),
(20,6,'u','u'),(21,7,'s','s'),(22,5,'y','y'),(23,1,'z','z'),(24,204,'h','h'),
(25,224,'p','p'),(26,9,'e','e'),(27,5,'i','i'),(28,0,'y','y'),(29,3,'w','w');
CREATE TABLE t2 (
pk INT, col_int_key INT,
col_varchar_key VARCHAR(1), col_varchar_nokey VARCHAR(1),
PRIMARY KEY (pk)
);
INSERT INTO t2 VALUES
(1,4,'b','b'),(2,8,'y','y'),(3,0,'p','p'),(4,0,'f','f'),(5,0,'p','p'),
(6,7,'d','d'),(7,7,'f','f'),(8,5,'j','j'),(9,3,'e','e'),(10,188,'u','u'),
(11,4,'v','v'),(12,9,'u','u'),(13,6,'i','i'),(14,1,'x','x'),(15,5,'l','l'),
(16,6,'q','q'),(17,2,'n','n'),(18,4,'r','r'),(19,231,'c','c'),(20,4,'h','h'),
(21,3,'k','k'),(22,3,'t','t'),(23,7,'t','t'),(24,6,'k','k'),(25,7,'g','g'),
(26,9,'z','z'),(27,4,'n','n'),(28,4,'j','j'),(29,2,'l','l'),(30,1,'d','d'),
(31,2,'t','t'),(32,194,'y','y'),(33,2,'i','i'),(34,3,'j','j'),(35,8,'r','r'),
(36,4,'b','b'),(37,9,'o','o'),(38,4,'k','k'),(39,5,'a','a'),(40,5,'f','f'),
(41,9,'t','t'),(42,3,'c','c'),(43,8,'c','c'),(44,0,'r','r'),(45,98,'k','k'),
(46,3,'l','l'),(47,1,'o','o'),(48,0,'t','t'),(49,189,'v','v'),(50,8,'x','x'),
(51,3,'j','j'),(52,3,'x','x'),(53,9,'k','k'),(54,6,'o','o'),(55,8,'z','z'),
(56,3,'n','n'),(57,9,'c','c'),(58,5,'d','d'),(59,9,'s','s'),(60,2,'j','j'),
(61,2,'w','w'),(62,5,'f','f'),(63,8,'p','p'),(64,6,'o','o'),(65,9,'f','f'),
(66,0,'x','x'),(67,3,'q','q'),(68,6,'g','g'),(69,5,'x','x'),(70,8,'p','p'),
(71,2,'q','q'),(72,120,'q','q'),(73,25,'v','v'),(74,1,'g','g'),(75,3,'l','l'),
(76,1,'w','w'),(77,3,'h','h'),(78,153,'c','c'),(79,5,'o','o'),(80,9,'o','o'),
(81,1,'v','v'),(82,8,'y','y'),(83,7,'d','d'),(84,6,'p','p'),(85,2,'z','z'),
(86,4,'t','t'),(87,7,'b','b'),(88,3,'y','y'),(89,8,'k','k'),(90,4,'c','c'),
(91,6,'z','z'),(92,1,'t','t'),(93,7,'o','o'),(94,1,'u','u'),(95,0,'t','t'),
(96,2,'k','k'),(97,7,'u','u'),(98,2,'b','b'),(99,1,'m','m'),(100,5,'o','o');
SELECT SUM(alias2.col_varchar_nokey) , alias2.pk AS field2 FROM t1 AS alias1
STRAIGHT_JOIN t2 AS alias2 ON alias2.pk = alias1.col_int_key WHERE alias1.pk
GROUP BY field2 ORDER BY alias1.col_int_key,alias2.pk ;
SUM(alias2.col_varchar_nokey)	field2
0	1
0	2
0	3
0	4
0	5
0	6
0	7
0	9

Now we look at the trace:

SELECT * FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
QUERY	TRACE	MISSING_BYTES_BEYOND_MAX_MEM_SIZE	INSUFFICIENT_PRIVILEGES
SELECT SUM(alias2.col_varchar_nokey) , alias2.pk AS field2 FROM t1 AS alias1
STRAIGHT_JOIN t2 AS alias2 ON alias2.pk = alias1.col_int_key WHERE alias1.pk
GROUP BY field2 ORDER BY alias1.col_int_key,alias2.pk	{

This was the first column: it repeats the query (this is a useful mark when several traces are remembered thanks to optimizer_trace_offset and optimizer_trace_limit). Now the trace. The statement's execution is naturally made of "steps":

"steps": [
  {
    "join_preparation": {

This is a join's preparation

 "select#": 1,

for the first SELECT of the statement (which has only one, here). Here are steps of the join's preparation:

"steps": [
  {
    "expanded_query": "/* select#1 */ select \
       sum(`test`.`alias2`.`col_varchar_nokey`) AS \ 
       `SUM(alias2.col_varchar_nokey)`,`test`.`alias2`.`pk` AS `field2` \
       from (`test`.`t1` `alias1` straight_join `test`.`t2` `alias2` \ 
       on((`test`.`alias2`.`pk` = `test`.`alias1`.`col_int_key`))) \
       where `test`.`alias1`.`pk` \
       group by `test`.`alias2`.`pk` \
       order by `test`.`alias1`.`col_int_key`,`test`.`alias2`.`pk`"
   }

Above is the query as it is in the join's preparation: fields have been resolved to their database and table, and each SELECT is annotated with its number (useful with subqueries).

] /* steps */
       } /* join_preparation */
     },
     {

Off to optimization:

"join_optimization": {
         "select#": 1,
         "steps": [
           {
             "condition_processing": {
               "condition": "WHERE",
               "original_condition": "(`test`.`alias1`.`pk` and \
               (`test`.`alias2`.`pk` = `test`.`alias1`.`col_int_key`))",
               "steps": [
                 {
                   "transformation": "equality_propagation",
                   "resulting_condition": "(`test`.`alias1`.`pk` and \
                   multiple equal(`test`.`alias2`.`pk`, \
                   `test`.`alias1`.`col_int_key`))"
                 },
                 {
                   "transformation": "constant_propagation",
                   "resulting_condition": "(`test`.`alias1`.`pk` and \
                   multiple equal(`test`.`alias2`.`pk`, \
                   `test`.`alias1`.`col_int_key`))"
                 },
                 {
                   "transformation": "trivial_condition_removal",
                   "resulting_condition": "(`test`.`alias1`.`pk` and \
                   multiple equal(`test`.`alias2`.`pk`, \
                   `test`.`alias1`.`col_int_key`))"

Not much happened in equality propagation above.

                 }
               ] /* steps */
             } /* condition_processing */
           },
           {
             "ref_optimizer_key_uses": [
               {
                 "database": "test",
                 "table": "alias2",
                 "field": "pk",
                 "equals": "`test`.`alias1`.`col_int_key`",
                 "null_rejecting": true

A possible ref access has been identified, and it is NULL-rejecting: any NULL value in `test`.`alias1`.`col_int_key` cannot have a match (it could have a match if the operator were <=>).

                }
             ] /* ref_optimizer_key_uses */
           },
           {

Now for every table in the query we estimate the cost of, and number of records returned by, a table scan, a range access,

"records_estimation": [
               {
                 "database": "test",
                 "table": "alias1",
                 "const_keys_added": {
                   "keys": [
                   ] /* keys */,
                   "cause": "group_by"
                 } /* const_keys_added */,
                 "range_analysis": {
                   "table_scan": {
                     "records": 20,
                     "cost": 8.1977
                   } /* table_scan */
                 } /* range_analysis */
               },
               {
                 "database": "test",
                 "table": "alias2",
                 "const_keys_added": {
                   "keys": [
                     "PRIMARY"
                   ] /* keys */,
                   "cause": "group_by"
                 } /* const_keys_added */,
                 "range_analysis": {
                   "table_scan": {
                     "records": 100,
                     "cost": 24.588
                   } /* table_scan */,
                   "potential_range_indices": [
                     {
                       "index": "PRIMARY",
                       "usable": true,
                       "key_parts": [
                         "pk"
                       ] /* key_parts */
                     }
                   ] /* potential_range_indices */,
                   "setup_range_conditions": [
                   ] /* setup_range_conditions */,
                   "group_index_range": {
                     "chosen": false,

Not possible to use GROUP_MIN_MAX because it can handle only one table, and we have two in the join:

 "cause": "not_single_table" 
  } /* group_index_range */

No range access is possible.

} /* range_analysis */
               }
             ] /* records_estimation */
           },
           {

Finding an optimal order for tables (greedy search); actually as this is STRAIGHT_JOIN only the requested order is explored, and access methods are selected:

"considered_execution_plans": [
               {
                 "database": "test",
                 "table": "alias1",
                 "best_access_path": {
                   "considered_access_paths": [
                     {
                       "access_type": "scan",
                       "records": 20,
                       "cost": 2.0977,
                       "chosen": true

Table scan be chosen!

                     }
                   ] /* considered_access_paths */
                 } /* best_access_path */,
                 "cost_for_plan": 6.0977,
                 "records_for_plan": 20,

We estimate that reading the first table, and applying any conditions to it, will yield 20 rows.

                 "rest_of_plan": [
                   {
                     "database": "test",
                     "table": "alias2",
                     "best_access_path": {
                       "considered_access_paths": [
                         {
                           "access_type": "ref",
                           "index": "PRIMARY",
                           "records": 1,
                           "cost": 20.2,
                           "chosen": true

We choose ref access on the primary key of alias2.

                         },
                         {
                           "access_type": "scan",
                           "using_join_cache": true,
                           "records": 75,
                           "cost": 7.4917,
                           "chosen": false

But not table scan, because its amount of records (75) is far greater than that of ref access (1).

                         }
                       ] /* considered_access_paths */
                     } /* best_access_path */,
                     "cost_for_plan": 30.098,
                     "records_for_plan": 20,
                     "chosen": true
                   }
                 ] /* rest_of_plan */
               }
             ] /* considered_execution_plans */
           },
           {

Now that the order of tables is fixed, we can split the WHERE condition in chunks which can be tested early ("push down of conditions down the join tree"):

"attaching_conditions_to_tables": {
               "original_condition": "((`test`.`alias2`.`pk` = \
               `test`.`alias1`.`col_int_key`) and `test`.`alias1`.`pk`)",
               "attached_conditions_computation": [
               ] /* attached_conditions_computation */,
               "attached_conditions_summary": [
                 {
                   "database": "test",
                   "table": "alias1",
                   "attached": "(`test`.`alias1`.`pk` and \
                   (`test`.`alias1`.`col_int_key` is not null))"

This condition above can be tested on rows of alias1 without even reading rows of alias2.

                 },
                 {
                   "database": "test",
                   "table": "alias2",
                   "attached": null
                 }
               ] /* attached_conditions_summary */
             } /* attaching_conditions_to_tables */
           },
           {

Now we try to simplify ORDER BY:

              "clause_processing": {
               "clause": "ORDER BY",
               "original_clause": "`test`.`alias1`.`col_int_key`,`test`.`alias2`.`pk`",
               "items": [
                 {
                   "item": "`test`.`alias1`.`col_int_key`"
                 },
                 {
                   "item": "`test`.`alias2`.`pk`",
                   "eq_ref_to_preceding_items": true

Because the WHERE clause contains `alias2`.`pk`=`alias1`.`col_int_key`, ordering by both columns is a waste: can just order by the first column, the second will always be equal to it.

                 }
               ] /* items */,
               "resulting_clause_is_simple": true,
               "resulting_clause": "`test`.`alias1`.`col_int_key`"

So we get a shorter ORDER BY clause - and this is not visible in EXPLAIN or EXPLAIN EXTENDED!! This simplification can be worth it: this shorter clause, being single-column and single-table, could be implemented by an index scan...

              } /* clause_processing */
           },
           {
             "clause_processing": {
               "clause": "GROUP BY",
               "original_clause": "`test`.`alias2`.`pk`",
               "items": [
                 {
                   "item": "`test`.`alias2`.`pk`"
                 }
               ] /* items */,
               "resulting_clause_is_simple": false,
               "resulting_clause": "`test`.`alias2`.`pk`"
             } /* clause_processing */
           },
           {
             "refine_plan": [
               {
                 "database": "test",
                 "table": "alias1",
                 "scan_type": "table"
               },
               {
                 "database": "test",
                 "table": "alias2"
               }
             ] /* refine_plan */
           }
         ] /* steps */
       } /* join_optimization */
     },
     {

Now we execute the join, and nothing interesting happens here:

       "join_execution": {
         "select#": 1,
         "steps": [
         ] /* steps */
       } /* join_execution */
     }
   ] /* steps */
 }	0	0

This was just an example. All traces have the same basic structure, but if a statement uses subqueries, there are several join preparations/optimizations/executions, subquery-specific transformations not shown here...


User Comments
User comments in this section are, as the name implies, provided by MySQL users. The MySQL documentation team is not responsible for, nor do they endorse, any of the information provided here.
Sign Up Login You must be logged in to post a comment.