SQL Hints

SQL hints provide direct control over query execution plans when the automatic optimizer needs guidance. Tacnode's pg_hint_plan extension allows developers to influence scan methods, join algorithms, and execution order through comment-based hints.

Quick Reference

Hint TypeSyntaxPurposeUse Case
Scan MethodSeqScan(table)Force scan typeBypass index when not beneficial
Join AlgorithmHashJoin(t1 t2)Control join methodOptimize multi-table queries
Join OrderLeading((t1 t2) t3)Force join sequenceComplex query optimization
Index SelectionIndexScan(table idx)Specify index usageEnsure optimal index choice
GUC SettingsSet(param value)Query-specific configTemporary parameter overrides

When to Use SQL Hints

Ideal Scenarios

  • Complex queries with suboptimal automatic plans
  • Performance testing to compare different execution strategies
  • Emergency fixes for production performance issues
  • Optimizer limitations with insufficient statistics
  • Legacy query optimization during system migration

Performance Benefits

  • Immediate control over execution strategy
  • Consistent performance by fixing execution plans
  • Fine-tuned optimization for specific workloads
  • Emergency response capability for performance issues

Setup and Installation

Enable pg_hint_plan Extension

-- Install the extension (requires superuser privileges)
CREATE EXTENSION IF NOT EXISTS pg_hint_plan;
 
-- Verify installation
SELECT extname, extversion 
FROM pg_extension 
WHERE extname = 'pg_hint_plan';

Enable Query ID Tracking

-- Enable query ID calculation for hint table functionality
ALTER SYSTEM SET compute_query_id = on;
SELECT pg_reload_conf();
 
-- Verify setting
SHOW compute_query_id;
-- Should show 'on'
 
-- Enable hint table (optional, for dynamic hint management)
ALTER DATABASE your_database SET pg_hint_plan.enable_hint_table = on;

Hint Syntax and Types

Basic Syntax

-- General format
/*+ HintType(table_alias parameter) */
SELECT ...
 
-- Multiple hints
/*+ SeqScan(t1) HashJoin(t1 t2) */
SELECT ...
 
-- Important: Use table aliases if defined in query
/*+ IndexScan(orders idx_orders_user_id) */
SELECT * FROM orders WHERE user_id = 100;

Scan Method Control

-- Force sequential scan (useful for small tables or bulk operations)
/*+ SeqScan(orders) */
SELECT * FROM orders WHERE amount > 100;
 
-- Force index scan (ensure index usage)
/*+ IndexScan(orders idx_orders_date) */
SELECT * FROM orders WHERE order_date >= '2024-01-01';
 
-- Force index-only scan (when covering index is available)
/*+ IndexOnlyScan(orders idx_orders_status_date) */
SELECT order_id, status FROM orders WHERE status = 'completed';
 
-- Disable specific scan types
/*+ NoSeqScan(large_table) */
SELECT * FROM large_table WHERE indexed_column = 'value';

Join Algorithm Control

-- Force hash join (good for large tables with equi-joins)
/*+ HashJoin(customers orders) */
SELECT c.name, o.total
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id;
 
-- Force nested loop join (efficient for small result sets)
/*+ NestLoop(products orders) */
SELECT p.name, o.quantity
FROM products p
JOIN orders o ON p.product_id = o.product_id
WHERE p.category = 'electronics';
 
-- Force merge join (good for sorted data)
/*+ MergeJoin(table1 table2) */
SELECT * FROM table1 t1
JOIN table2 t2 ON t1.sorted_column = t2.sorted_column;
 
-- Disable specific join types
/*+ NoHashJoin(large_table1 large_table2) */
SELECT * FROM large_table1 l1 JOIN large_table2 l2 ON l1.id = l2.id;

Join Order Control

-- Simple two-table join order
/*+ Leading(orders customers) */
SELECT * FROM customers c
JOIN orders o ON c.customer_id = o.customer_id;
 
-- Complex multi-table join order with grouping
/*+ Leading(((customers orders) products)) */
SELECT c.name, o.order_date, p.product_name
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
JOIN products p ON o.product_id = p.product_id;
 
-- Alternative join order for performance comparison
/*+ Leading((customers (orders products))) */
SELECT c.name, o.order_date, p.product_name
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
JOIN products p ON o.product_id = p.product_id;

Complete Example Setup

Sample Database Schema

-- Create demonstration tables
CREATE TABLE customers (
    customer_id SERIAL PRIMARY KEY,
    name VARCHAR(100) NOT NULL,
    email VARCHAR(150),
    city VARCHAR(50),
    registration_date DATE,
    status VARCHAR(20) DEFAULT 'active'
) USING COLUMNAR;
 
CREATE TABLE products (
    product_id SERIAL PRIMARY KEY,
    product_name VARCHAR(200) NOT NULL,
    category VARCHAR(50),
    price DECIMAL(10,2),
    in_stock BOOLEAN DEFAULT true
) USING COLUMNAR;
 
CREATE TABLE orders (
    order_id SERIAL PRIMARY KEY,
    customer_id INTEGER REFERENCES customers(customer_id),
    product_id INTEGER REFERENCES products(product_id),
    order_date DATE NOT NULL,
    quantity INTEGER NOT NULL,
    total_amount DECIMAL(12,2),
    status VARCHAR(20) DEFAULT 'pending'
) USING COLUMNAR;
 
-- Create indexes for optimal performance
CREATE INDEX idx_customers_city ON customers(city);
CREATE INDEX idx_customers_status ON customers(status);
CREATE INDEX idx_products_category ON products(category);
CREATE INDEX idx_orders_customer_id ON orders(customer_id);
CREATE INDEX idx_orders_product_id ON orders(product_id);
CREATE INDEX idx_orders_date ON orders(order_date);
CREATE INDEX idx_orders_status ON orders(status);
CREATE INDEX idx_orders_date_status ON orders(order_date, status);

Sample Data Generation

-- Insert sample customers
INSERT INTO customers (name, email, city, registration_date, status)
SELECT 
    'Customer_' || i,
    'customer' || i || '@example.com',
    CASE (i % 5) 
        WHEN 0 THEN 'New York'
        WHEN 1 THEN 'Los Angeles' 
        WHEN 2 THEN 'Chicago'
        WHEN 3 THEN 'Houston'
        ELSE 'Phoenix'
    END,
    CURRENT_DATE - (random() * 365)::int,
    CASE WHEN i % 10 = 0 THEN 'inactive' ELSE 'active' END
FROM generate_series(1, 10000) i;
 
-- Insert sample products
INSERT INTO products (product_name, category, price, in_stock)
SELECT 
    'Product_' || i,
    CASE (i % 4)
        WHEN 0 THEN 'electronics'
        WHEN 1 THEN 'clothing'
        WHEN 2 THEN 'books'
        ELSE 'home'
    END,
    (random() * 500 + 10)::decimal(10,2),
    CASE WHEN i % 20 = 0 THEN false ELSE true END
FROM generate_series(1, 1000) i;
 
-- Insert sample orders
INSERT INTO orders (customer_id, product_id, order_date, quantity, total_amount, status)
SELECT 
    (random() * 9999 + 1)::int,
    (random() * 999 + 1)::int,
    CURRENT_DATE - (random() * 90)::int,
    (random() * 5 + 1)::int,
    (random() * 1000 + 50)::decimal(12,2),
    CASE (random() * 3)::int
        WHEN 0 THEN 'pending'
        WHEN 1 THEN 'completed'
        ELSE 'cancelled'
    END
FROM generate_series(1, 50000) i;
 
-- Update table statistics
ANALYZE customers, products, orders;

Practical Hint Examples

Optimizing Index Selection

-- Problem: Optimizer chooses wrong index
EXPLAIN ANALYZE
SELECT * FROM orders 
WHERE order_date >= '2024-01-01' AND status = 'completed';
 
-- Solution: Force composite index usage
/*+ IndexScan(orders idx_orders_date_status) */
EXPLAIN ANALYZE
SELECT * FROM orders 
WHERE order_date >= '2024-01-01' AND status = 'completed';
 
-- Compare with sequential scan for large result sets
/*+ SeqScan(orders) */
EXPLAIN ANALYZE  
SELECT * FROM orders 
WHERE order_date >= '2023-01-01';  -- Large date range

Join Optimization Strategies

-- Customer order analysis with different join strategies
-- Original query (let optimizer decide)
EXPLAIN ANALYZE
SELECT c.name, c.city, COUNT(o.order_id) as order_count, SUM(o.total_amount) as total_spent
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE c.status = 'active'
GROUP BY c.customer_id, c.name, c.city;
 
-- Force hash join for large datasets
/*+ HashJoin(c o) */
EXPLAIN ANALYZE
SELECT c.name, c.city, COUNT(o.order_id) as order_count, SUM(o.total_amount) as total_spent
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE c.status = 'active'
GROUP BY c.customer_id, c.name, c.city;
 
-- Force nested loop for selective queries
/*+ NestLoop(c o) IndexScan(c idx_customers_status) */
EXPLAIN ANALYZE
SELECT c.name, o.order_date, o.total_amount
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
WHERE c.city = 'New York' AND o.status = 'completed';

Complex Multi-table Optimization

-- Product sales analysis with join order control
-- Test different join orders for performance
/*+ Leading(((p o) c)) HashJoin(p o) HashJoin(po c) */
EXPLAIN ANALYZE
SELECT 
    p.category,
    c.city,
    COUNT(*) as sales_count,
    AVG(o.total_amount) as avg_amount
FROM products p
JOIN orders o ON p.product_id = o.product_id
JOIN customers c ON o.customer_id = c.customer_id
WHERE p.in_stock = true
  AND c.status = 'active'
  AND o.order_date >= '2024-01-01'
GROUP BY p.category, c.city
ORDER BY sales_count DESC;
 
-- Alternative join order
/*+ Leading((c (o p))) NestLoop(c o) HashJoin(co p) */
EXPLAIN ANALYZE
SELECT 
    p.category,
    c.city,
    COUNT(*) as sales_count,
    AVG(o.total_amount) as avg_amount
FROM products p
JOIN orders o ON p.product_id = o.product_id
JOIN customers c ON o.customer_id = c.customer_id
WHERE p.in_stock = true
  AND c.status = 'active'
  AND o.order_date >= '2024-01-01'
GROUP BY p.category, c.city
ORDER BY sales_count DESC;

Dynamic Hint Management

Hint Table Configuration

-- Create hint table (if not exists)
CREATE SCHEMA IF NOT EXISTS hint_plan;
 
CREATE TABLE IF NOT EXISTS hint_plan.hints (
    id SERIAL PRIMARY KEY,
    query_id BIGINT NOT NULL,
    application_name TEXT NOT NULL DEFAULT '',
    hints TEXT NOT NULL,
    enabled BOOLEAN DEFAULT true,
    created_at TIMESTAMP DEFAULT now(),
    updated_at TIMESTAMP DEFAULT now(),
    UNIQUE (query_id, application_name)
);
 
-- Add tracking columns for better management
ALTER TABLE hint_plan.hints 
ADD COLUMN IF NOT EXISTS description TEXT,
ADD COLUMN IF NOT EXISTS created_by TEXT DEFAULT current_user;

Managing Hints via Hint Table

-- Step 1: Get query ID for a problematic query
EXPLAIN (VERBOSE, ANALYZE)
SELECT c.name, COUNT(o.order_id) as total_orders
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE c.registration_date >= '2024-01-01'
GROUP BY c.customer_id, c.name;
-- Look for "Query Identifier: XXXXX" in output
 
-- Step 2: Add hint to table (example query_id: 123456789)
INSERT INTO hint_plan.hints (query_id, application_name, hints, description, created_by)
VALUES (
    123456789,
    '',  -- Empty for all applications
    'HashJoin(c o) SeqScan(c)',
    'Force hash join for customer order summary',
    current_user
);
 
-- Step 3: Verify hint is applied
EXPLAIN ANALYZE
SELECT c.name, COUNT(o.order_id) as total_orders
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE c.registration_date >= '2024-01-01'
GROUP BY c.customer_id, c.name;
 
-- View all active hints
SELECT 
    query_id,
    application_name,
    hints,
    description,
    created_by,
    created_at
FROM hint_plan.hints 
WHERE enabled = true
ORDER BY created_at DESC;

Query Blocking with Hints

-- Block expensive queries by setting very short timeout
INSERT INTO hint_plan.hints (query_id, application_name, hints, description)
VALUES (
    987654321,  -- Replace with actual query_id
    '',
    'Set(statement_timeout ''1ms'')',
    'Block expensive cross-join query'
);
 
-- Test the blocked query
SELECT * FROM customers c, orders o, products p;
-- Should fail with timeout error

Best Practices and Guidelines

When to Use Hints

-- ✅ Good use cases:
-- 1. Emergency performance fixes
/*+ SeqScan(large_table) */
SELECT COUNT(*) FROM large_table WHERE complex_condition;
 
-- 2. Specific workload optimization
/*+ HashJoin(fact_table dim_table) */
SELECT * FROM fact_table f JOIN dimension_table d ON f.dim_id = d.id;
 
-- 3. Testing different execution strategies
/*+ Leading((t1 t2) t3) */
-- vs
/*+ Leading(t1 (t2 t3)) */
 
-- ❌ Avoid using hints for:
-- 1. General query optimization (improve schema/indexes instead)
-- 2. Working around application design issues
-- 3. Permanent solutions without understanding root cause

Hint Maintenance

-- Regular hint table cleanup
DELETE FROM hint_plan.hints 
WHERE created_at < now() - INTERVAL '90 days'
  AND description LIKE '%temporary%';
 
-- Disable unused hints
UPDATE hint_plan.hints 
SET enabled = false 
WHERE query_id NOT IN (
    SELECT queryid FROM pg_stat_statements 
    WHERE calls > 0
);
 
-- Review hint effectiveness
SELECT 
    query_id,
    hints,
    description,
    created_at,
    'Review needed' as status
FROM hint_plan.hints 
WHERE created_at < now() - INTERVAL '30 days'
ORDER BY created_at;

Troubleshooting Common Issues

Hint Not Applied

-- Check if hint syntax is correct
EXPLAIN (ANALYZE, VERBOSE)
/*+ SeqScan(table_name) */  -- Must match actual table name/alias
SELECT * FROM table_name WHERE condition;
 
-- Check for hint conflicts
EXPLAIN (ANALYZE, VERBOSE)
/*+ IndexScan(t) SeqScan(t) */  -- Conflicting hints
SELECT * FROM table_name t WHERE condition;
 
-- Verify pg_hint_plan is loaded
SHOW shared_preload_libraries;
SELECT * FROM pg_extension WHERE extname = 'pg_hint_plan';