MySQL Joins and Advanced Queries
JOIN Types in Practice
Section titled “JOIN Types in Practice”Setup — sample tables:
CREATE TABLE customers ( id INT PRIMARY KEY, name VARCHAR(100), country VARCHAR(50));
CREATE TABLE orders ( id INT PRIMARY KEY, customer_id INT, amount DECIMAL(10,2), status VARCHAR(20), created_at DATETIME);INNER JOIN — only matching rows:
SELECT c.name, o.id AS order_id, o.amountFROM customers cINNER JOIN orders o ON c.id = o.customer_id;-- Only customers who have at least one orderLEFT JOIN — all customers, even without orders:
SELECT c.name, COUNT(o.id) AS order_count, COALESCE(SUM(o.amount), 0) AS total_spentFROM customers cLEFT JOIN orders o ON c.id = o.customer_idGROUP BY c.id, c.name;Finding customers with NO orders:
SELECT c.nameFROM customers cLEFT JOIN orders o ON c.id = o.customer_idWHERE o.id IS NULL;Self JOIN — find pairs:
-- Customers from the same countrySELECT a.name AS customer1, b.name AS customer2, a.countryFROM customers aJOIN customers b ON a.country = b.country AND a.id < b.idORDER BY a.country;Multi-table JOIN:
SELECT c.name, o.id AS order_id, p.name AS product, oi.quantityFROM customers cJOIN orders o ON c.id = o.customer_idJOIN order_items oi ON o.id = oi.order_idJOIN products p ON oi.product_id = p.idWHERE o.status = 'completed';Subqueries
Section titled “Subqueries”-- Scalar subquery in SELECTSELECT name, (SELECT COUNT(*) FROM orders WHERE customer_id = c.id) AS order_countFROM customers c;
-- Subquery in WHERESELECT name FROM customersWHERE id IN ( SELECT DISTINCT customer_id FROM orders WHERE amount > 1000);
-- Correlated subquery — references outer querySELECT c.name, o.id, o.amountFROM customers cJOIN orders o ON c.id = o.customer_idWHERE o.amount = ( SELECT MAX(amount) FROM orders WHERE customer_id = c.id);
-- EXISTS — more efficient than IN for large setsSELECT name FROM customers cWHERE EXISTS ( SELECT 1 FROM orders o WHERE o.customer_id = c.id AND o.status = 'pending');CTEs (Common Table Expressions)
Section titled “CTEs (Common Table Expressions)”-- Basic CTEWITH high_value_customers AS ( SELECT customer_id, SUM(amount) AS total FROM orders WHERE status = 'completed' GROUP BY customer_id HAVING SUM(amount) > 5000)SELECT c.name, hvc.totalFROM customers cJOIN high_value_customers hvc ON c.id = hvc.customer_idORDER BY hvc.total DESC;
-- Multiple CTEsWITHmonthly AS ( SELECT DATE_FORMAT(created_at, '%Y-%m') AS month, SUM(amount) AS revenue FROM orders GROUP BY month),ranked AS ( SELECT month, revenue, RANK() OVER (ORDER BY revenue DESC) AS rnk FROM monthly)SELECT * FROM ranked WHERE rnk <= 3;
-- Recursive CTE (MySQL 8.0+) — category hierarchyWITH RECURSIVE category_path AS ( SELECT id, name, parent_id, name AS path FROM categories WHERE parent_id IS NULL
UNION ALL
SELECT c.id, c.name, c.parent_id, CONCAT(cp.path, ' > ', c.name) FROM categories c JOIN category_path cp ON c.parent_id = cp.id)SELECT * FROM category_path ORDER BY path;Window Functions (MySQL 8.0+)
Section titled “Window Functions (MySQL 8.0+)”-- Running total of orders per customerSELECT customer_id, created_at, amount, SUM(amount) OVER (PARTITION BY customer_id ORDER BY created_at) AS running_totalFROM orders;
-- Rank customers by spendSELECT customer_id, SUM(amount) AS total_spend, RANK() OVER (ORDER BY SUM(amount) DESC) AS rnk, DENSE_RANK() OVER (ORDER BY SUM(amount) DESC) AS dense_rnkFROM ordersGROUP BY customer_id;
-- Month-over-month changeSELECT month, revenue, LAG(revenue) OVER (ORDER BY month) AS prev_month, revenue - LAG(revenue) OVER (ORDER BY month) AS change, ROUND(100.0 * (revenue - LAG(revenue) OVER (ORDER BY month)) / NULLIF(LAG(revenue) OVER (ORDER BY month), 0), 1) AS pct_changeFROM ( SELECT DATE_FORMAT(created_at, '%Y-%m') AS month, SUM(amount) AS revenue FROM orders GROUP BY month) monthlyORDER BY month;
-- Nth row per group (top order per customer)SELECT customer_id, id, amountFROM ( SELECT customer_id, id, amount, ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY amount DESC) AS rn FROM orders) rankedWHERE rn = 1;GROUP BY and Aggregation
Section titled “GROUP BY and Aggregation”-- Aggregation with HAVINGSELECT customer_id, COUNT(*) AS order_count, SUM(amount) AS total, AVG(amount) AS avg_order, MIN(amount) AS smallest, MAX(amount) AS largestFROM ordersWHERE status = 'completed'GROUP BY customer_idHAVING COUNT(*) >= 3ORDER BY total DESC;
-- ROLLUP — add subtotals and grand totalSELECT country, status, SUM(amount) AS revenueFROM customers cJOIN orders o ON c.id = o.customer_idGROUP BY country, status WITH ROLLUP;
-- GROUP_CONCAT — aggregate stringsSELECT customer_id, GROUP_CONCAT(DISTINCT status ORDER BY status SEPARATOR ', ') AS statuses, GROUP_CONCAT(id ORDER BY created_at SEPARATOR ', ') AS order_idsFROM ordersGROUP BY customer_id;Useful Date and String Queries
Section titled “Useful Date and String Queries”-- Orders in the last 30 daysSELECT * FROM ordersWHERE created_at >= NOW() - INTERVAL 30 DAY;
-- Group by weekSELECT YEARWEEK(created_at) AS week, SUM(amount) AS revenueFROM ordersGROUP BY week;
-- String pattern matchingSELECT * FROM customers WHERE name LIKE 'John%';SELECT * FROM customers WHERE name REGEXP '^(John|Jane)';
-- CASE in aggregationSELECT SUM(CASE WHEN status = 'completed' THEN amount ELSE 0 END) AS completed_revenue, SUM(CASE WHEN status = 'refunded' THEN amount ELSE 0 END) AS refunded_revenue, COUNT(CASE WHEN status = 'pending' THEN 1 END) AS pending_countFROM orders;Performance Tips for JOINs
Section titled “Performance Tips for JOINs”-- EXPLAIN to check join strategyEXPLAIN SELECT c.name, COUNT(o.id)FROM customers cLEFT JOIN orders o ON c.id = o.customer_idGROUP BY c.id;
-- Ensure indexes on JOIN columnsCREATE INDEX idx_orders_customer_id ON orders(customer_id);
-- Avoid functions on indexed columns in JOIN/WHERE-- Bad: WHERE YEAR(created_at) = 2024-- Good: WHERE created_at >= '2024-01-01' AND created_at < '2025-01-01'
-- Use covering index for common join + filter patternsCREATE INDEX idx_orders_customer_status ON orders(customer_id, status, amount);