M
M
mr_blond972018-03-03 14:07:21
SQL query optimization
mr_blond97, 2018-03-03 14:07:21

Why doesn't the MySQL optimizer use all three index columns?

Percona MySQL 5.7


table schema:


CREATE TABLE Developer.Rate (
  ID bigint(20) UNSIGNED NOT NULL AUTO_INCREMENT,
  TIME datetime NOT NULL,
  BASE varchar(3) NOT NULL,
  QUOTE varchar(3) NOT NULL,
  BID double NOT NULL,
  ASK double NOT NULL,
  PRIMARY KEY (ID),
  INDEX IDX_TIME (TIME),
  UNIQUE INDEX IDX_UK (BASE, QUOTE, TIME)
)
ENGINE = INNODB
ROW_FORMAT = COMPRESSED;


Request for the most recent data before the specified period, uses an incomplete unique key, 2 columns out of 3.

if you make a request in the usual way:
EXPLAIN FORMAT=JSON
SELECT
  BID
FROM 
  Rate
WHERE 
  BASE = 'EUR' 
  AND QUOTE = 'USD' 
  AND `TIME` <= (NOW() - INTERVAL 1 MONTH) 
ORDER BY 
  `TIME` DESC 
LIMIT 1
;

then explain will show that only the first 2 columns of the unique index are used in the index: BASE, QUOTE
{
  "query_block": {
    "select_id": 1,
    "cost_info": {
      "query_cost": "10231052.40"
    },
    "ordering_operation": {
      "using_filesort": false,
      "table": {
        "table_name": "Rate",
        "access_type": "ref",
        "possible_keys": [
          "IDX_UK",
          "IDX_TIME"
        ],
        "key": "IDX_UK",
        "used_key_parts": [
          "BASE",
          "QUOTE"
        ],
        "key_length": "22",
        "ref": [
          "const",
          "const"
        ],
        "rows_examined_per_scan": 45966462,
        "rows_produced_per_join": 22983231,
        "filtered": "50.00",
        "cost_info": {
          "read_cost": "1037760.00",
          "eval_cost": "4596646.20",
          "prefix_cost": "10231052.40",
          "data_read_per_join": "1G"
        },
        "used_columns": [
          "ID",
          "TIME",
          "BASE",
          "QUOTE",
          "BID"
        ],
        "attached_condition": "((`Developer`.`Rate`.`BASE` <=> 'EUR') and (`Developer`.`Rate`.`QUOTE` <=> 'USD') and (`Developer`.`Rate`.`TIME` <= <cache>((now() - interval 1 month))))"
      }
    }
  }
}

but if you force the optimizer to use the IDX_UK index, then the muscle starts using all three columns in the query
EXPLAIN FORMAT=JSON
SELECT
  BID
FROM 
  Rate FORCE INDEX(IDX_UK)
WHERE 
  BASE = 'EUR' 
  AND QUOTE = 'USD' 
  AND `TIME` <= (NOW() - INTERVAL 1 MONTH) 
ORDER BY 
  `TIME` DESC 
LIMIT 1

{
  "query_block": {
    "select_id": 1,
    "cost_info": {
      "query_cost": "10231052.40"
    },
    "ordering_operation": {
      "using_filesort": false,
      "table": {
        "table_name": "Rate",
        "access_type": "range",
        "possible_keys": [
          "IDX_UK"
        ],
        "key": "IDX_UK",
        "used_key_parts": [
          "BASE",
          "QUOTE",
          "TIME"
        ],
        "key_length": "27",
        "rows_examined_per_scan": 45966462,
        "rows_produced_per_join": 15320621,
        "filtered": "100.00",
        "index_condition": "((`Developer`.`Rate`.`BASE` = 'EUR') and (`Developer`.`Rate`.`QUOTE` = 'USD') and (`Developer`.`Rate`.`TIME` <= <cache>((now() - interval 1 month))))",
        "cost_info": {
          "read_cost": "1037760.00",
          "eval_cost": "3064124.31",
          "prefix_cost": "10231052.40",
          "data_read_per_join": "818M"
        },
        "used_columns": [
          "ID",
          "TIME",
          "BASE",
          "QUOTE",
          "BID"
        ]
      }
    }
  }
}

Why doesn't he want to use all three columns of this index without explicitly specifying the index? Did the optimizer like IDX_TIME more than IDX_UK?

Answer the question

In order to leave comments, you need to log in

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question