Optimize MySQL Query Performance with Proper Indexing
Problem
MySQL queries are executing slowly, causing application timeouts and poor user experience, especially on large datasets.
Root Cause
Poor query performance usually results from missing indexes, suboptimal query structure, full table scans, or inefficient use of MySQL features like joins and subqueries.
Solution
Systematically optimize MySQL query performance:
Step 1: Identify Slow Queries
-- Enable slow query log
SET GLOBAL slow_query_log = 'ON';
SET GLOBAL long_query_time = 1; -- Log queries taking > 1 second
SET GLOBAL log_queries_not_using_indexes = 'ON';
-- Or use Performance Schema
SELECT
DIGEST_TEXT,
COUNT_STAR,
AVG_TIMER_WAIT/1000000000 as avg_time_seconds,
SUM_ROWS_EXAMINED/COUNT_STAR as avg_rows_examined
FROM performance_schema.events_statements_summary_by_digest
ORDER BY AVG_TIMER_WAIT DESC
LIMIT 10;
Step 2: Analyze Query Execution Plans
-- Use EXPLAIN to analyze query execution
EXPLAIN SELECT u.name, p.title
FROM users u
JOIN posts p ON u.id = p.user_id
WHERE u.created_at > '2024-01-01'
AND p.status = 'published'
ORDER BY p.created_at DESC;
-- Use EXPLAIN ANALYZE for detailed timing (MySQL 8.0+)
EXPLAIN ANALYZE SELECT u.name, p.title
FROM users u
JOIN posts p ON u.id = p.user_id
WHERE u.created_at > '2024-01-01';
Step 3: Create Strategic Indexes
-- Index for foreign key joins
CREATE INDEX idx_posts_user_id ON posts(user_id);
-- Composite index for WHERE clauses
CREATE INDEX idx_users_created_status ON users(created_at, status);
-- Index for ORDER BY clauses
CREATE INDEX idx_posts_created_desc ON posts(created_at DESC);
-- Covering index (includes all needed columns)
CREATE INDEX idx_posts_covering ON posts(user_id, status, created_at, title);
-- Partial index for common filtered queries
CREATE INDEX idx_published_posts ON posts(created_at) WHERE status = 'published';
Step 4: Optimize Query Structure
-- ❌ Avoid SELECT *
SELECT * FROM users WHERE email = 'user@example.com';
-- ✅ Select only needed columns
SELECT id, name, email FROM users WHERE email = 'user@example.com';
-- ❌ Avoid functions in WHERE clauses
SELECT * FROM posts WHERE YEAR(created_at) = 2024;
-- ✅ Use range conditions instead
SELECT * FROM posts
WHERE created_at >= '2024-01-01'
AND created_at < '2025-01-01';
-- ❌ Avoid OR conditions that prevent index usage
SELECT * FROM users WHERE name = 'John' OR email = 'john@example.com';
-- ✅ Use UNION for better index usage
SELECT * FROM users WHERE name = 'John'
UNION
SELECT * FROM users WHERE email = 'john@example.com';
Step 5: Optimize JOINs
-- ❌ Inefficient subquery
SELECT * FROM users
WHERE id IN (
SELECT user_id FROM posts WHERE status = 'published'
);
-- ✅ Use JOIN instead
SELECT DISTINCT u.* FROM users u
JOIN posts p ON u.id = p.user_id
WHERE p.status = 'published';
-- ✅ Use EXISTS for better performance in some cases
SELECT * FROM users u
WHERE EXISTS (
SELECT 1 FROM posts p
WHERE p.user_id = u.id AND p.status = 'published'
);
Step 6: Monitor and Maintain Performance
-- Check index usage
SELECT
TABLE_NAME,
INDEX_NAME,
SEQ_IN_INDEX,
COLUMN_NAME,
CARDINALITY
FROM INFORMATION_SCHEMA.STATISTICS
WHERE TABLE_SCHEMA = 'your_database'
ORDER BY TABLE_NAME, INDEX_NAME, SEQ_IN_INDEX;
-- Find unused indexes
SELECT
s.TABLE_SCHEMA,
s.TABLE_NAME,
s.INDEX_NAME
FROM INFORMATION_SCHEMA.STATISTICS s
LEFT JOIN INFORMATION_SCHEMA.INDEX_STATISTICS i
ON s.TABLE_SCHEMA = i.TABLE_SCHEMA
AND s.TABLE_NAME = i.TABLE_NAME
AND s.INDEX_NAME = i.INDEX_NAME
WHERE i.INDEX_NAME IS NULL
AND s.INDEX_NAME != 'PRIMARY';
-- Regular maintenance
ANALYZE TABLE users, posts;
OPTIMIZE TABLE users, posts;