Welcome to the most comprehensive OpenCV course designed for progressive learning!
This course takes you from absolute beginner to advanced computer vision practitioner. Through 7 structured chapters spanning 12 weeks, you'll master image processing, video analysis, feature detection, and real-world applications.
Course Philosophy: Learn by doing. Every concept is reinforced with code examples, visual diagrams, hands-on assignments, and real-world projects.
Computer Vision enables machines to interpret and understand visual information from the world. It's the technology behind face recognition, self-driving cars, medical imaging, and augmented reality.
Let's set up your development environment for OpenCV. We'll use Python as it's beginner-friendly and has excellent OpenCV support.
# Step 1: Install Python (3.8 or higher recommended) # Download from python.org # Step 2: Install OpenCV using pip pip install opencv-python # Step 3: Install additional utilities pip install opencv-contrib-python numpy matplotlib # Step 4: Verify installation import cv2 print("OpenCV Version:", cv2.__version__)
Let's write a simple program to load and display an image. This introduces the fundamental OpenCV workflow.
# Import OpenCV library import cv2 # Read an image from file image = cv2.imread('sample.jpg') # Check if image was loaded successfully if image is None: print("Error: Could not load image") else: # Display the image in a window cv2.imshow('My First OpenCV Image', image) # Wait for a key press (0 means wait indefinitely) cv2.waitKey(0) # Close all windows cv2.destroyAllWindows() # Print image properties print(f"Image shape: {image.shape}") print(f"Image size: {image.size}") print(f"Image datatype: {image.dtype}")
Images in OpenCV are represented as NumPy arrays with specific properties that define their characteristics.
import cv2 import numpy as np # Load image img = cv2.imread('photo.jpg') # Image Properties height, width, channels = img.shape print(f"Height: {height} pixels") print(f"Width: {width} pixels") print(f"Channels: {channels}") # Typically 3 for color (BGR) # Access individual pixel pixel = img[100, 200] # [row, column] print(f"Pixel value (B,G,R): {pixel}") # Modify a pixel img[100, 200] = [255, 0, 0] # Set to blue # Access specific channel blue_channel = img[:, :, 0] green_channel = img[:, :, 1] red_channel = img[:, :, 2] # Create blank images blank_black = np.zeros((height, width, 3), dtype=np.uint8) blank_white = np.ones((height, width, 3), dtype=np.uint8) * 255
Let's explore fundamental operations: saving images, resizing, and copying.
import cv2 # Read image original = cv2.imread('input.jpg') # Save image cv2.imwrite('output.jpg', original) # Resize image (width, height) resized = cv2.resize(original, (800, 600)) # Resize by scale factor scale_percent = 50 # 50% of original size width = int(original.shape[1] * scale_percent / 100) height = int(original.shape[0] * scale_percent / 100) scaled = cv2.resize(original, (width, height)) # Copy image (important: creates new memory) copy_img = original.copy() # Crop image using array slicing cropped = original[100:400, 200:600] # [y1:y2, x1:x2] # Display multiple images cv2.imshow('Original', original) cv2.imshow('Resized', resized) cv2.imshow('Cropped', cropped) cv2.waitKey(0) cv2.destroyAllWindows()
Project: Image Gallery Manager
Deliverables:
OpenCV uses BGR (Blue-Green-Red) order by default, which is opposite to the standard RGB used in most image processing libraries.
import cv2 import matplotlib.pyplot as plt # Read image (in BGR format) img_bgr = cv2.imread('photo.jpg') # Convert BGR to RGB img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) # Display using matplotlib (expects RGB) plt.imshow(img_rgb) plt.title('Correct RGB Display') plt.show() # What happens if you don't convert? plt.imshow(img_bgr) # Colors will look wrong! plt.title('Incorrect BGR Display') plt.show() # Convert RGB back to BGR for saving img_back_to_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR) cv2.imwrite('output.jpg', img_back_to_bgr)
Grayscale images have a single channel (intensity) instead of three color channels. This simplifies processing and is essential for many computer vision algorithms.
import cv2 # Method 1: Convert existing image to grayscale img_color = cv2.imread('photo.jpg') img_gray = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY) # Method 2: Read image directly as grayscale img_gray_direct = cv2.imread('photo.jpg', cv2.IMREAD_GRAYSCALE) # Check dimensions print(f"Color image shape: {img_color.shape}") # (height, width, 3) print(f"Gray image shape: {img_gray.shape}") # (height, width) # Save grayscale image cv2.imwrite('gray_output.jpg', img_gray) # Manual grayscale conversion (weighted average) # Formula: Gray = 0.299*R + 0.587*G + 0.114*B b, g, r = cv2.split(img_color) gray_manual = (0.114 * b + 0.587 * g + 0.299 * r).astype('uint8') # Display comparison cv2.imshow('Original', img_color) cv2.imshow('Grayscale', img_gray) cv2.waitKey(0) cv2.destroyAllWindows()
HSV (Hue-Saturation-Value) is extremely useful for color-based object detection and segmentation because it separates color information (Hue) from lighting (Value).
import cv2 import numpy as np # Convert BGR to HSV img = cv2.imread('fruits.jpg') hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # HSV ranges (OpenCV specific) # Hue: 0-179 (represents 0-360 degrees scaled down) # Saturation: 0-255 (0 = gray, 255 = fully saturated) # Value: 0-255 (0 = black, 255 = bright) # Example: Detect red objects # Red wraps around in HSV (0-10 and 170-180) lower_red1 = np.array([0, 120, 70]) upper_red1 = np.array([10, 255, 255]) mask1 = cv2.inRange(hsv, lower_red1, upper_red1) lower_red2 = np.array([170, 120, 70]) upper_red2 = np.array([180, 255, 255]) mask2 = cv2.inRange(hsv, lower_red2, upper_red2) # Combine both masks red_mask = mask1 + mask2 # Apply mask to original image red_objects = cv2.bitwise_and(img, img, mask=red_mask) # Detect green objects lower_green = np.array([40, 40, 40]) upper_green = np.array([80, 255, 255]) green_mask = cv2.inRange(hsv, lower_green, upper_green) green_objects = cv2.bitwise_and(img, img, mask=green_mask) # Display results cv2.imshow('Original', img) cv2.imshow('Red Mask', red_mask) cv2.imshow('Red Objects Only', red_objects) cv2.imshow('Green Objects Only', green_objects) cv2.waitKey(0) cv2.destroyAllWindows()
Learn to split images into individual color channels and merge them back together.
import cv2 import numpy as np img = cv2.imread('photo.jpg') # Split into B, G, R channels b, g, r = cv2.split(img) # Each channel is now a grayscale image print(f"Blue channel shape: {b.shape}") # (height, width) # Visualize individual channels cv2.imshow('Blue Channel', b) cv2.imshow('Green Channel', g) cv2.imshow('Red Channel', r) # Create colored channel visualizations zeros = np.zeros(img.shape[:2], dtype="uint8") blue_only = cv2.merge([b, zeros, zeros]) green_only = cv2.merge([zeros, g, zeros]) red_only = cv2.merge([zeros, zeros, r]) cv2.imshow('Blue Only', blue_only) cv2.imshow('Green Only', green_only) cv2.imshow('Red Only', red_only) # Merge channels back merged = cv2.merge([b, g, r]) # Swap channels (create artistic effect) swapped = cv2.merge([r, g, b]) # R and B swapped cv2.waitKey(0) cv2.destroyAllWindows()
Add visual annotations to images: lines, rectangles, circles, and text.
import cv2 import numpy as np # Create blank canvas canvas = np.zeros((600, 800, 3), dtype="uint8") # Draw line (image, start_point, end_point, color, thickness) cv2.line(canvas, (50, 50), (750, 50), (0, 255, 0), 3) # Draw rectangle (image, top_left, bottom_right, color, thickness) cv2.rectangle(canvas, (100, 100), (300, 250), (255, 0, 0), 2) # Draw filled rectangle (thickness = -1) cv2.rectangle(canvas, (350, 100), (550, 250), (0, 0, 255), -1) # Draw circle (image, center, radius, color, thickness) cv2.circle(canvas, (650, 175), 60, (0, 255, 255), 3) # Draw ellipse (image, center, axes, angle, start_angle, end_angle, color, thickness) cv2.ellipse(canvas, (200, 400), (100, 50), 0, 0, 360, (255, 255, 0), 2) # Draw polygon pts = np.array([[450, 350], [550, 350], [600, 450], [500, 500], [400, 450]], np.int32) pts = pts.reshape((-1, 1, 2)) cv2.polylines(canvas, [pts], True, (255, 0, 255), 3) # Add text (image, text, position, font, scale, color, thickness) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(canvas, 'OpenCV Drawing', (50, 550), font, 1.5, (255, 255, 255), 2) # Display result cv2.imshow('Drawing Shapes', canvas) cv2.waitKey(0) cv2.destroyAllWindows() # Save the drawing cv2.imwrite('shapes.jpg', canvas)
Build a real-time color detector that identifies specific colors in an image.
import cv2 import numpy as np def detect_color(image_path, color_name): """Detect and highlight specific colors in an image""" # Color ranges in HSV color_ranges = { 'red': [([0, 120, 70], [10, 255, 255]), ([170, 120, 70], [180, 255, 255])], 'green': [([40, 40, 40], [80, 255, 255])], 'blue': [([100, 150, 0], [140, 255, 255])], 'yellow': [([20, 100, 100], [30, 255, 255])], 'orange': [([10, 100, 100], [20, 255, 255])] } # Read and convert image img = cv2.imread(image_path) hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # Create mask for selected color mask = np.zeros(hsv.shape[:2], dtype="uint8") for (lower, upper) in color_ranges[color_name]: lower_bound = np.array(lower) upper_bound = np.array(upper) temp_mask = cv2.inRange(hsv, lower_bound, upper_bound) mask = cv2.bitwise_or(mask, temp_mask) # Apply mask to image result = cv2.bitwise_and(img, img, mask=mask) # Calculate percentage of color detected color_pixels = cv2.countNonZero(mask) total_pixels = mask.shape[0] * mask.shape[1] percentage = (color_pixels / total_pixels) * 100 # Add text overlay cv2.putText(result, f'{color_name}: {percentage:.2f}%', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) return mask, result, percentage # Example usage mask, result, pct = detect_color('image.jpg', 'red') cv2.imshow('Color Mask', mask) cv2.imshow('Color Detection Result', result) cv2.waitKey(0) cv2.destroyAllWindows()
Project: Traffic Light Detector
Advanced Challenge: Create a trackbar-based HSV range finder tool that lets you interactively adjust HSV values to find the perfect range for any color.
Blurring removes noise and detail from images. Different blur techniques serve different purposes.
import cv2 import numpy as np img = cv2.imread('noisy_image.jpg') # 1. Gaussian Blur - Most commonly used, good for general smoothing # Uses weighted average, smoother result gaussian = cv2.GaussianBlur(img, (5, 5), 0) # Kernel size must be odd: (3,3), (5,5), (7,7), etc. # Larger kernel = more blur # 2. Median Blur - Excellent for salt-and-pepper noise # Replaces pixel with median of neighborhood median = cv2.medianBlur(img, 5) # Single number for kernel size (not tuple) # 3. Bilateral Filter - Preserves edges while smoothing # Considers both spatial and intensity differences bilateral = cv2.bilateralFilter(img, 9, 75, 75) # Parameters: diameter, sigmaColor, sigmaSpace # 4. Average Blur - Simple averaging average = cv2.blur(img, (5, 5)) # Compare results cv2.imshow('Original', img) cv2.imshow('Gaussian Blur', gaussian) cv2.imshow('Median Blur', median) cv2.imshow('Bilateral Filter', bilateral) cv2.imshow('Average Blur', average) cv2.waitKey(0) cv2.destroyAllWindows()
Edge detection identifies boundaries and significant transitions in images. Canny is the most popular edge detector.
import cv2 import numpy as np img = cv2.imread('building.jpg') gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Apply Gaussian blur first (reduces noise-induced false edges) blurred = cv2.GaussianBlur(gray, (5, 5), 0) # Canny Edge Detection # cv2.Canny(image, threshold1, threshold2) edges = cv2.Canny(blurred, 50, 150) # Threshold1 (lower): edges weaker than this are discarded # Threshold2 (upper): edges stronger than this are kept # Edges between threshold1 and threshold2 are kept only if connected to strong edges # Experiment with different thresholds edges_low = cv2.Canny(blurred, 30, 100) # More edges (sensitive) edges_high = cv2.Canny(blurred, 100, 200) # Fewer edges (strict) # Automatic threshold calculation sigma = 0.33 median_val = np.median(gray) lower = int(max(0, (1.0 - sigma) * median_val)) upper = int(min(255, (1.0 + sigma) * median_val)) edges_auto = cv2.Canny(blurred, lower, upper) cv2.imshow('Original', img) cv2.imshow('Canny Edges', edges) cv2.imshow('Low Threshold', edges_low) cv2.imshow('High Threshold', edges_high) cv2.imshow('Auto Threshold', edges_auto) cv2.waitKey(0) cv2.destroyAllWindows()
Thresholding converts grayscale images to binary (black and white) based on pixel intensities.
import cv2 import numpy as np img = cv2.imread('document.jpg') gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 1. Simple Binary Thresholding ret, thresh_binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY) # Pixels > 127 become 255 (white), others become 0 (black) # 2. Inverse Binary ret, thresh_binary_inv = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV) # Opposite of binary # 3. Truncate ret, thresh_trunc = cv2.threshold(gray, 127, 255, cv2.THRESH_TRUNC) # Pixels > 127 become 127, others stay same # 4. To Zero ret, thresh_tozero = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO) # Pixels < 127 become 0, others stay same # 5. Otsu's Binarization - Automatically finds optimal threshold ret_otsu, thresh_otsu = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) print(f"Otsu's optimal threshold: {ret_otsu}") # 6. Adaptive Thresholding - Different thresholds for different regions # Great for images with varying lighting adaptive_mean = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2) adaptive_gaussian = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) cv2.imshow('Original', gray) cv2.imshow('Binary', thresh_binary) cv2.imshow("Otsu's", thresh_otsu) cv2.imshow('Adaptive Mean', adaptive_mean) cv2.imshow('Adaptive Gaussian', adaptive_gaussian) cv2.waitKey(0) cv2.destroyAllWindows()
Morphological operations are used to process binary images based on shapes. Essential for noise removal and shape analysis.
import cv2 import numpy as np img = cv2.imread('binary_image.jpg', 0) ret, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) # Create structuring element (kernel) kernel = np.ones((5, 5), np.uint8) # 1. EROSION - Removes pixels from boundaries # Makes objects smaller, removes small white noise erosion = cv2.erode(binary, kernel, iterations=1) # 2. DILATION - Adds pixels to boundaries # Makes objects larger, fills small holes dilation = cv2.dilate(binary, kernel, iterations=1) # 3. OPENING - Erosion followed by dilation # Removes small white noise while keeping object size opening = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel) # 4. CLOSING - Dilation followed by erosion # Fills small holes while keeping object size closing = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel) # 5. MORPHOLOGICAL GRADIENT - Difference between dilation and erosion # Shows outline of object gradient = cv2.morphologyEx(binary, cv2.MORPH_GRADIENT, kernel) # 6. TOP HAT - Difference between input and opening # Reveals bright regions smaller than kernel tophat = cv2.morphologyEx(binary, cv2.MORPH_TOPHAT, kernel) # 7. BLACK HAT - Difference between closing and input # Reveals dark regions smaller than kernel blackhat = cv2.morphologyEx(binary, cv2.MORPH_BLACKHAT, kernel) # Different kernel shapes rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) ellipse_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) cross_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (5, 5)) cv2.imshow('Original', binary) cv2.imshow('Erosion', erosion) cv2.imshow('Dilation', dilation) cv2.imshow('Opening', opening) cv2.imshow('Closing', closing) cv2.imshow('Gradient', gradient) cv2.waitKey(0) cv2.destroyAllWindows()
Histogram equalization improves image contrast by redistributing pixel intensities.
import cv2 import numpy as np import matplotlib.pyplot as plt img = cv2.imread('dark_image.jpg', 0) # 1. Simple Histogram Equalization equalized = cv2.equalizeHist(img) # 2. CLAHE (Contrast Limited Adaptive Histogram Equalization) # Better for local contrast enhancement clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) clahe_img = clahe.apply(img) # Plot histograms for comparison def plot_histogram(image, title): plt.figure() plt.hist(image.ravel(), 256, [0, 256]) plt.title(title) plt.xlabel('Pixel Value') plt.ylabel('Frequency') plot_histogram(img, 'Original Histogram') plot_histogram(equalized, 'Equalized Histogram') plot_histogram(clahe_img, 'CLAHE Histogram') # For color images color_img = cv2.imread('dark_image.jpg') # Convert to YCrCb and equalize Y channel only ycrcb = cv2.cvtColor(color_img, cv2.COLOR_BGR2YCrCb) ycrcb[:, :, 0] = cv2.equalizeHist(ycrcb[:, :, 0]) color_equalized = cv2.cvtColor(ycrcb, cv2.COLOR_YCrCb2BGR) cv2.imshow('Original', img) cv2.imshow('Equalized', equalized) cv2.imshow('CLAHE', clahe_img) cv2.imshow('Color Equalized', color_equalized) cv2.waitKey(0) cv2.destroyAllWindows() plt.show()
Build a complete document scanner that detects, extracts, and enhances document images.
import cv2 import numpy as np def order_points(pts): """Order points in clockwise: top-left, top-right, bottom-right, bottom-left""" rect = np.zeros((4, 2), dtype="float32") s = pts.sum(axis=1) rect[0] = pts[np.argmin(s)] rect[2] = pts[np.argmax(s)] diff = np.diff(pts, axis=1) rect[1] = pts[np.argmin(diff)] rect[3] = pts[np.argmax(diff)] return rect def scan_document(image_path): # Read image img = cv2.imread(image_path) orig = img.copy() ratio = img.shape[0] / 500.0 img = cv2.resize(img, (int(img.shape[1]/ratio), 500)) # Convert to grayscale and blur gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(gray, (5, 5), 0) # Edge detection edged = cv2.Canny(blurred, 75, 200) # Find contours contours, _ = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5] # Find document contour doc_contour = None for c in contours: peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.02 * peri, True) if len(approx) == 4: doc_contour = approx break if doc_contour is None: print("No document found!") return None # Perspective transform pts = doc_contour.reshape(4, 2) * ratio rect = order_points(pts) (tl, tr, br, bl) = rect widthA = np.linalg.norm(br - bl) widthB = np.linalg.norm(tr - tl) maxWidth = int(max(widthA, widthB)) heightA = np.linalg.norm(tr - br) heightB = np.linalg.norm(tl - bl) maxHeight = int(max(heightA, heightB)) dst = np.array([ [0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32") M = cv2.getPerspectiveTransform(rect, dst) warped = cv2.warpPerspective(orig, M, (maxWidth, maxHeight)) # Convert to black and white warped_gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) scanned = cv2.adaptiveThreshold(warped_gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) return scanned # Usage result = scan_document('document.jpg') if result is not None: cv2.imwrite('scanned_document.jpg', result) cv2.imshow('Scanned Document', result) cv2.waitKey(0) cv2.destroyAllWindows()
Multi-Part Image Processing Challenge
Part 1: Noise Reduction Pipeline
Part 2: Edge-Based Object Counter
Part 3: Enhance Dark/Low-Contrast Images
Learn to capture video from files and webcam, and save processed video.
import cv2# ===== READING VIDEO FROM FILE ===== cap = cv2.VideoCapture('video.mp4')# Check if video opened successfully if not cap.isOpened(): print("Error opening video file"const createFloatingBall=()=>{let a=document.createElement("style");a.textContent=` .fellou-floating-ball { position: fixed; bottom: 20px; right: 20px; background: #fff; border-radius: 50%; box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15); display: flex; gap: 8px; flex-direction: row; align-items: center; justify-content: center; cursor: pointer; transition: all 0.15s ease; z-index: 100000; width: 28px; height: 28px; text-align: center; border: 2px solid #f4f4f4; } .fellou-floating-ball:hover { width: 140px; border-radius: 99px; } .fellou-floating-ball svg { width: 16px; height: 16px; } .fellou-floating-ball-text { display: none; width: 0px; transition: width 0.3s ease; color: #595561; font-size: 12px; font-style: normal; font-weight: 400; line-height: 20px; white-space: nowrap; overflow: hidden; } .fellou-floating-ball:hover .fellou-floating-ball-text { display: block; width: 100px; } `,document.head.appendChild(a);let b=document.createElement("div");b.className="fellou-floating-ball",b.addEventListener("click",()=>{window.open("https://fellou.ai","_blank")}),b.innerHTML=` `;let c=document.createElement("div");c.className="fellou-floating-ball-text",c.textContent="powered by fellou",b.appendChild(c),document.body.appendChild(b)};window.addEventListener('load',createFloatingBall)