NumPy Fundamentals
NumPy (Numerical Python) is the foundation of the entire Python data science ecosystem. Every major ML library—Pandas, Scikit-learn, TensorFlow, PyTorch—is built on top of NumPy's array abstraction. Understanding NumPy deeply will make you more effective with all these tools and help you write faster, more memory-efficient code.
Why NumPy?
Python lists are flexible but slow for numerical computation. They can hold mixed types, so Python must check types at runtime. They store pointers to objects scattered in memory, causing cache misses. And operations require explicit loops.
NumPy arrays solve these problems:
Homogeneous types: All elements have the same type (e.g., all float64), eliminating runtime type checking.
Contiguous memory: Elements are stored in adjacent memory locations, enabling efficient CPU cache utilization and SIMD (Single Instruction, Multiple Data) operations.
Vectorized operations: Operations apply to entire arrays without explicit loops, pushing computation into optimized C code.
Broadcasting: Arrays of different shapes can interact through automatic, memory-efficient expansion.
The performance difference is dramatic—NumPy operations can be 100x faster than equivalent Python loops.
import numpy as np
import time
def numpy_vs_python_speed():
"""Compare NumPy and Python list performance."""
n = 1_000_000
# Python list
py_list = list(range(n))
start = time.time()
py_result = [x * 2 for x in py_list]
py_time = time.time() - start
# NumPy array
np_array = np.arange(n)
start = time.time()
np_result = np_array * 2
np_time = time.time() - start
print("Performance Comparison: NumPy vs Python Lists")
print("-" * 50)
print(f"Operation: Multiply {n:,} elements by 2")
print(f"Python list: {py_time*1000:.2f} ms")
print(f"NumPy array: {np_time*1000:.2f} ms")
print(f"Speedup: {py_time/np_time:.1f}x")
numpy_vs_python_speed()Creating Arrays
NumPy provides many ways to create arrays, each suited to different situations:
import numpy as np
# From Python lists
arr = np.array([1, 2, 3, 4, 5])
matrix = np.array([[1, 2, 3], [4, 5, 6]])
# Specify data type explicitly
floats = np.array([1, 2, 3], dtype=np.float32)
# Zeros and ones (common for initialization)
zeros = np.zeros((3, 4)) # 3x4 matrix of zeros
ones = np.ones((2, 3)) # 2x3 matrix of ones
empty = np.empty((2, 2)) # Uninitialized (faster, random values)
# Sequences
range_arr = np.arange(0, 10, 2) # [0, 2, 4, 6, 8] - like Python range
linspace = np.linspace(0, 1, 5) # [0, 0.25, 0.5, 0.75, 1] - evenly spaced
# Identity matrix (useful in linear algebra)
eye = np.eye(3) # 3x3 identity matrix
# Random arrays (crucial for ML initialization)
uniform = np.random.rand(3, 3) # Uniform [0, 1)
normal = np.random.randn(3, 3) # Standard normal
integers = np.random.randint(0, 10, (3, 3)) # Random integers
print("Array Creation Examples")
print("-" * 50)
print(f"1D array: {arr}")
print(f"2D matrix:\n{matrix}")
print(f"Zeros shape {zeros.shape}:\n{zeros}")
print(f"Linspace: {linspace}")
print(f"Random normal:\n{np.round(normal, 3)}")Array Attributes
Every NumPy array has attributes that describe its structure:
import numpy as np
arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
print("Array Attributes")
print("-" * 50)
print(f"Array:\n{arr}\n")
print(f"shape: {arr.shape}") # (3, 4) - 3 rows, 4 columns
print(f"ndim: {arr.ndim}") # 2 - number of dimensions
print(f"size: {arr.size}") # 12 - total elements
print(f"dtype: {arr.dtype}") # int64 - data type
print(f"itemsize: {arr.itemsize}") # 8 bytes per element
print(f"nbytes: {arr.nbytes}") # 96 total bytes (12 * 8)Shape is the most important attribute. It's a tuple giving the size along each dimension. A (3, 4) array has 3 rows and 4 columns. A (2, 3, 4) array is a "stack" of 2 matrices, each 3×4.
Indexing and Slicing
NumPy extends Python's slicing syntax to multiple dimensions:
import numpy as np
arr = np.arange(12).reshape(3, 4)
print(f"Array:\n{arr}\n")
# Single element
print(f"arr[0, 0] = {arr[0, 0]}") # First element
print(f"arr[2, 3] = {arr[2, 3]}") # Last element
print(f"arr[-1, -1] = {arr[-1, -1]}") # Same as above (negative indexing)
# Slicing: arr[row_slice, col_slice]
print(f"\narr[0, :] = {arr[0, :]} # First row")
print(f"arr[:, 0] = {arr[:, 0]} # First column")
print(f"arr[0:2, 1:3] =\n{arr[0:2, 1:3]} # Submatrix")
# Boolean indexing (very powerful!)
print(f"\narr[arr > 5] = {arr[arr > 5]} # Elements > 5")
# Fancy indexing (index with arrays)
rows = np.array([0, 2])
cols = np.array([1, 3])
print(f"arr[rows, cols] = {arr[rows, cols]} # Elements at (0,1) and (2,3)")Important: NumPy slices are views, not copies. Modifying a slice modifies the original array:
import numpy as np
arr = np.array([1, 2, 3, 4, 5])
slice_view = arr[1:4]
slice_view[0] = 99
print("Views vs Copies")
print("-" * 50)
print(f"After modifying slice: arr = {arr}")
print("The slice is a view—changes affect original!")
# To get a copy:
arr = np.array([1, 2, 3, 4, 5])
slice_copy = arr[1:4].copy()
slice_copy[0] = 99
print(f"After modifying copy: arr = {arr}")
print("The copy is independent.")Reshaping Arrays
Reshaping changes an array's dimensions without changing its data:
import numpy as np
arr = np.arange(12)
print(f"Original: {arr} (shape: {arr.shape})")
# Reshape to 2D
reshaped = arr.reshape(3, 4)
print(f"\nReshaped to (3, 4):\n{reshaped}")
# Use -1 to auto-calculate one dimension
auto_rows = arr.reshape(-1, 4) # NumPy figures out rows = 3
print(f"\nreshape(-1, 4) auto-calculates rows:\n{auto_rows}")
# Flatten back to 1D
flat = reshaped.flatten() # Returns a copy
ravel = reshaped.ravel() # Returns a view (faster)
print(f"\nFlattened: {flat}")
# Transpose (swap axes)
transposed = reshaped.T
print(f"\nTransposed (4, 3):\n{transposed}")
# Add dimension (useful for broadcasting)
col_vector = arr.reshape(-1, 1) # Shape: (12, 1)
row_vector = arr.reshape(1, -1) # Shape: (1, 12)
# Or use np.newaxis
col_vector = arr[:, np.newaxis] # Same resultBroadcasting
Broadcasting is NumPy's way of performing operations on arrays with different shapes. Instead of copying data, NumPy "broadcasts" smaller arrays across larger ones.
Broadcasting rules:
- If arrays have different numbers of dimensions, prepend 1s to the smaller shape
- Arrays are compatible along a dimension if they have the same size or one of them is 1
- The result shape is the maximum along each dimension
import numpy as np
# Scalar broadcast: adds 10 to every element
arr = np.array([[1, 2, 3], [4, 5, 6]])
result = arr + 10
print("Scalar broadcasting:")
print(f"arr + 10 =\n{result}\n")
# Row vector broadcast: adds [1, 2, 3] to each row
row = np.array([1, 2, 3])
result = arr + row
print("Row vector broadcasting:")
print(f"arr (2,3) + row (3,) =\n{result}\n")
# Column vector broadcast: adds [[10], [20]] to each column
col = np.array([[10], [20]])
result = arr + col
print("Column vector broadcasting:")
print(f"arr (2,3) + col (2,1) =\n{result}\n")
# Outer product via broadcasting
a = np.array([1, 2, 3])
b = np.array([10, 20])
outer = a[:, np.newaxis] * b[np.newaxis, :]
print("Outer product via broadcasting:")
print(f"a (3,1) * b (1,2) =\n{outer}")Broadcasting eliminates loops and temporary arrays, making code both faster and more readable.
Vectorized Operations
Vectorization means expressing operations on entire arrays rather than individual elements. NumPy applies operations element-wise:
import numpy as np
a = np.array([1, 2, 3, 4])
b = np.array([10, 20, 30, 40])
print("Vectorized Operations")
print("-" * 50)
print(f"a = {a}")
print(f"b = {b}\n")
# Arithmetic
print(f"a + b = {a + b}")
print(f"a * b = {a * b}")
print(f"a ** 2 = {a ** 2}")
# Comparisons (return boolean arrays)
print(f"a > 2 = {a > 2}")
print(f"a == b/10 = {a == b/10}")
# Universal functions (ufuncs)
print(f"\nnp.sqrt(a) = {np.sqrt(a)}")
print(f"np.exp(a) = {np.round(np.exp(a), 2)}")
print(f"np.sin(a) = {np.round(np.sin(a), 3)}")
print(f"np.log(a) = {np.round(np.log(a), 3)}")Aggregation Functions
NumPy provides functions to compute statistics across arrays:
import numpy as np
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(f"Array:\n{arr}\n")
# Global aggregations
print(f"sum: {arr.sum()}")
print(f"mean: {arr.mean()}")
print(f"std: {arr.std():.4f}")
print(f"min: {arr.min()}, max: {arr.max()}")
# Aggregation along axes
print(f"\nSum along axis=0 (columns): {arr.sum(axis=0)}")
print(f"Sum along axis=1 (rows): {arr.sum(axis=1)}")
print(f"Mean along axis=0: {arr.mean(axis=0)}")
# Useful for ML
print(f"\nargmax (index of max): {arr.argmax()}")
print(f"argmax per row: {arr.argmax(axis=1)}")
# Cumulative operations
print(f"\nCumulative sum: {np.cumsum(arr.flatten())}")The axis parameter is crucial: axis=0 operates along rows (giving one result per column), axis=1 operates along columns (giving one result per row).
Linear Algebra Operations
NumPy provides essential linear algebra operations for ML:
import numpy as np
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
v = np.array([1, 2])
print("Linear Algebra Operations")
print("-" * 50)
# Matrix multiplication (NOT element-wise *)
print(f"A @ B (matrix multiply):\n{A @ B}\n")
print(f"A @ v (matrix-vector):\n{A @ v}\n")
# Also: np.dot(A, B), np.matmul(A, B)
# Transpose
print(f"A.T (transpose):\n{A.T}\n")
# Determinant and inverse
print(f"det(A) = {np.linalg.det(A):.4f}")
print(f"inv(A):\n{np.linalg.inv(A)}\n")
# Eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(A)
print(f"Eigenvalues: {eigenvalues}")
print(f"Eigenvectors:\n{eigenvectors}\n")
# Solve linear system Ax = b
b = np.array([5, 11])
x = np.linalg.solve(A, b)
print(f"Solution to Ax = b: x = {x}")
print(f"Verify: A @ x = {A @ x}")Random Number Generation
ML relies heavily on randomness for initialization, sampling, and stochastic algorithms:
import numpy as np
# Set seed for reproducibility
np.random.seed(42)
print("Random Number Generation")
print("-" * 50)
# Basic distributions
uniform = np.random.rand(5) # Uniform [0, 1)
normal = np.random.randn(5) # Standard normal N(0, 1)
integers = np.random.randint(0, 10, 5) # Integers in [0, 10)
print(f"Uniform [0,1): {np.round(uniform, 3)}")
print(f"Standard normal: {np.round(normal, 3)}")
print(f"Random integers [0,10): {integers}")
# Parameterized distributions
custom_normal = np.random.normal(loc=5, scale=2, size=5) # N(5, 2²)
custom_uniform = np.random.uniform(low=-1, high=1, size=5)
print(f"\nN(5, 2²): {np.round(custom_normal, 3)}")
print(f"Uniform [-1, 1): {np.round(custom_uniform, 3)}")
# Shuffling and sampling
arr = np.arange(10)
np.random.shuffle(arr) # In-place shuffle
print(f"\nShuffled array: {arr}")
choices = np.random.choice([1, 2, 3, 4, 5], size=3, replace=False)
print(f"Random choices (no replacement): {choices}")
# For ML: Xavier/He initialization
fan_in, fan_out = 100, 50
xavier_std = np.sqrt(2.0 / (fan_in + fan_out))
weights = np.random.randn(fan_in, fan_out) * xavier_std
print(f"\nXavier init weights std: {weights.std():.4f} (target: {xavier_std:.4f})")Memory Efficiency Tips
Understanding memory layout helps write faster code:
import numpy as np
# Contiguous memory: C (row-major) vs F (column-major) order
arr_c = np.array([[1, 2, 3], [4, 5, 6]], order='C') # Default
arr_f = np.array([[1, 2, 3], [4, 5, 6]], order='F')
print("Memory Layout")
print("-" * 50)
print(f"C order (row-major): {arr_c.flags['C_CONTIGUOUS']}")
print(f"F order (col-major): {arr_f.flags['F_CONTIGUOUS']}")
# Views vs copies
arr = np.arange(10)
view = arr[::2] # View (shares memory)
copy = arr[::2].copy() # Copy (separate memory)
print(f"\nView shares memory: {np.shares_memory(arr, view)}")
print(f"Copy shares memory: {np.shares_memory(arr, copy)}")
# In-place operations save memory
arr = np.arange(1000000, dtype=np.float64)
arr += 1 # In-place: no new array
arr = arr + 1 # Creates new array (more memory)
# Use appropriate dtypes
float64_arr = np.zeros(1000000, dtype=np.float64)
float32_arr = np.zeros(1000000, dtype=np.float32)
print(f"\nfloat64 array: {float64_arr.nbytes / 1e6:.1f} MB")
print(f"float32 array: {float32_arr.nbytes / 1e6:.1f} MB")Key Takeaways
- NumPy arrays are faster than Python lists due to contiguous memory and vectorization
- Broadcasting enables operations on different-shaped arrays without copying data
- Vectorization replaces loops with array operations for dramatic speedups
- Axis parameter in aggregations: axis=0 for columns, axis=1 for rows
- @ operator for matrix multiplication (not *)
- Views vs copies: slices are views; use
.copy()when needed - Reproducibility: always set random seeds for experiments
- Memory efficiency: use appropriate dtypes, in-place operations, and views when possible