Skip to content

MySQL Joins and Advanced Queries

Setup — sample tables:

CREATE TABLE customers (
id INT PRIMARY KEY,
name VARCHAR(100),
country VARCHAR(50)
);
CREATE TABLE orders (
id INT PRIMARY KEY,
customer_id INT,
amount DECIMAL(10,2),
status VARCHAR(20),
created_at DATETIME
);

INNER JOIN — only matching rows:

SELECT c.name, o.id AS order_id, o.amount
FROM customers c
INNER JOIN orders o ON c.id = o.customer_id;
-- Only customers who have at least one order

LEFT JOIN — all customers, even without orders:

SELECT c.name, COUNT(o.id) AS order_count, COALESCE(SUM(o.amount), 0) AS total_spent
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id, c.name;

Finding customers with NO orders:

SELECT c.name
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
WHERE o.id IS NULL;

Self JOIN — find pairs:

-- Customers from the same country
SELECT a.name AS customer1, b.name AS customer2, a.country
FROM customers a
JOIN customers b ON a.country = b.country AND a.id < b.id
ORDER BY a.country;

Multi-table JOIN:

SELECT c.name, o.id AS order_id, p.name AS product, oi.quantity
FROM customers c
JOIN orders o ON c.id = o.customer_id
JOIN order_items oi ON o.id = oi.order_id
JOIN products p ON oi.product_id = p.id
WHERE o.status = 'completed';

-- Scalar subquery in SELECT
SELECT name,
(SELECT COUNT(*) FROM orders WHERE customer_id = c.id) AS order_count
FROM customers c;
-- Subquery in WHERE
SELECT name FROM customers
WHERE id IN (
SELECT DISTINCT customer_id FROM orders WHERE amount > 1000
);
-- Correlated subquery — references outer query
SELECT c.name, o.id, o.amount
FROM customers c
JOIN orders o ON c.id = o.customer_id
WHERE o.amount = (
SELECT MAX(amount)
FROM orders
WHERE customer_id = c.id
);
-- EXISTS — more efficient than IN for large sets
SELECT name FROM customers c
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.customer_id = c.id AND o.status = 'pending'
);

-- Basic CTE
WITH high_value_customers AS (
SELECT customer_id, SUM(amount) AS total
FROM orders
WHERE status = 'completed'
GROUP BY customer_id
HAVING SUM(amount) > 5000
)
SELECT c.name, hvc.total
FROM customers c
JOIN high_value_customers hvc ON c.id = hvc.customer_id
ORDER BY hvc.total DESC;
-- Multiple CTEs
WITH
monthly AS (
SELECT DATE_FORMAT(created_at, '%Y-%m') AS month,
SUM(amount) AS revenue
FROM orders
GROUP BY month
),
ranked AS (
SELECT month, revenue,
RANK() OVER (ORDER BY revenue DESC) AS rnk
FROM monthly
)
SELECT * FROM ranked WHERE rnk <= 3;
-- Recursive CTE (MySQL 8.0+) — category hierarchy
WITH RECURSIVE category_path AS (
SELECT id, name, parent_id, name AS path
FROM categories
WHERE parent_id IS NULL
UNION ALL
SELECT c.id, c.name, c.parent_id,
CONCAT(cp.path, ' > ', c.name)
FROM categories c
JOIN category_path cp ON c.parent_id = cp.id
)
SELECT * FROM category_path ORDER BY path;

-- Running total of orders per customer
SELECT customer_id, created_at, amount,
SUM(amount) OVER (PARTITION BY customer_id ORDER BY created_at) AS running_total
FROM orders;
-- Rank customers by spend
SELECT customer_id, SUM(amount) AS total_spend,
RANK() OVER (ORDER BY SUM(amount) DESC) AS rnk,
DENSE_RANK() OVER (ORDER BY SUM(amount) DESC) AS dense_rnk
FROM orders
GROUP BY customer_id;
-- Month-over-month change
SELECT month, revenue,
LAG(revenue) OVER (ORDER BY month) AS prev_month,
revenue - LAG(revenue) OVER (ORDER BY month) AS change,
ROUND(100.0 * (revenue - LAG(revenue) OVER (ORDER BY month))
/ NULLIF(LAG(revenue) OVER (ORDER BY month), 0), 1) AS pct_change
FROM (
SELECT DATE_FORMAT(created_at, '%Y-%m') AS month, SUM(amount) AS revenue
FROM orders GROUP BY month
) monthly
ORDER BY month;
-- Nth row per group (top order per customer)
SELECT customer_id, id, amount
FROM (
SELECT customer_id, id, amount,
ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY amount DESC) AS rn
FROM orders
) ranked
WHERE rn = 1;

-- Aggregation with HAVING
SELECT customer_id,
COUNT(*) AS order_count,
SUM(amount) AS total,
AVG(amount) AS avg_order,
MIN(amount) AS smallest,
MAX(amount) AS largest
FROM orders
WHERE status = 'completed'
GROUP BY customer_id
HAVING COUNT(*) >= 3
ORDER BY total DESC;
-- ROLLUP — add subtotals and grand total
SELECT country, status, SUM(amount) AS revenue
FROM customers c
JOIN orders o ON c.id = o.customer_id
GROUP BY country, status WITH ROLLUP;
-- GROUP_CONCAT — aggregate strings
SELECT customer_id,
GROUP_CONCAT(DISTINCT status ORDER BY status SEPARATOR ', ') AS statuses,
GROUP_CONCAT(id ORDER BY created_at SEPARATOR ', ') AS order_ids
FROM orders
GROUP BY customer_id;

-- Orders in the last 30 days
SELECT * FROM orders
WHERE created_at >= NOW() - INTERVAL 30 DAY;
-- Group by week
SELECT YEARWEEK(created_at) AS week, SUM(amount) AS revenue
FROM orders
GROUP BY week;
-- String pattern matching
SELECT * FROM customers WHERE name LIKE 'John%';
SELECT * FROM customers WHERE name REGEXP '^(John|Jane)';
-- CASE in aggregation
SELECT
SUM(CASE WHEN status = 'completed' THEN amount ELSE 0 END) AS completed_revenue,
SUM(CASE WHEN status = 'refunded' THEN amount ELSE 0 END) AS refunded_revenue,
COUNT(CASE WHEN status = 'pending' THEN 1 END) AS pending_count
FROM orders;

-- EXPLAIN to check join strategy
EXPLAIN SELECT c.name, COUNT(o.id)
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id;
-- Ensure indexes on JOIN columns
CREATE INDEX idx_orders_customer_id ON orders(customer_id);
-- Avoid functions on indexed columns in JOIN/WHERE
-- Bad: WHERE YEAR(created_at) = 2024
-- Good: WHERE created_at >= '2024-01-01' AND created_at < '2025-01-01'
-- Use covering index for common join + filter patterns
CREATE INDEX idx_orders_customer_status ON orders(customer_id, status, amount);