Vectors and Vector Spaces
Vectors are the fundamental building blocks of machine learning. From representing data points to encoding neural network weights, understanding vectors is essential for anyone working in AI and ML.
What is a Vector?
A vector is an ordered collection of numbers. In machine learning, we typically work with column vectors:
Each element $v_i$ represents a feature or dimension. For example, a house might be represented as:
where the dimensions represent square footage, bedrooms, bathrooms, and lot size in acres.
Vectors in Python with NumPy
NumPy is the standard library for vector operations in Python:
import numpy as np
# Creating vectors
v1 = np.array([1, 2, 3])
v2 = np.array([4, 5, 6])
# Vector from a list of features
house_features = np.array([1500, 3, 2, 0.25])
print(f"House vector: {house_features}")
print(f"Number of features (dimensions): {len(house_features)}")
# Creating special vectors
zeros = np.zeros(5) # [0, 0, 0, 0, 0]
ones = np.ones(5) # [1, 1, 1, 1, 1]
range_vec = np.arange(0, 10, 2) # [0, 2, 4, 6, 8]
linspace = np.linspace(0, 1, 5) # [0, 0.25, 0.5, 0.75, 1]
print(f"Zeros: {zeros}")
print(f"Ones: {ones}")
print(f"Range: {range_vec}")
print(f"Linspace: {linspace}")Vector Addition and Scalar Multiplication
Two fundamental operations define vector spaces:
Vector Addition: Add corresponding elements
Scalar Multiplication: Multiply each element by a scalar
import numpy as np
u = np.array([1, 2, 3])
v = np.array([4, 5, 6])
# Vector addition
addition = u + v
print(f"u + v = {addition}") # [5, 7, 9]
# Vector subtraction
subtraction = u - v
print(f"u - v = {subtraction}") # [-3, -3, -3]
# Scalar multiplication
scaled = 3 * u
print(f"3 * u = {scaled}") # [3, 6, 9]
# Linear combination: 2u + 3v
linear_combo = 2*u + 3*v
print(f"2u + 3v = {linear_combo}") # [14, 19, 24]The Dot Product
The dot product is one of the most important operations in ML:
The dot product has a geometric interpretation:
where $\theta$ is the angle between the vectors.
import numpy as np
u = np.array([1, 2, 3])
v = np.array([4, 5, 6])
# Dot product - three equivalent ways
dot1 = np.dot(u, v)
dot2 = u @ v
dot3 = np.sum(u * v)
print(f"np.dot(u, v) = {dot1}") # 32
print(f"u @ v = {dot2}") # 32
print(f"sum(u * v) = {dot3}") # 32
# Dot product in ML: weighted sum
weights = np.array([0.5, 0.3, 0.2])
features = np.array([100, 50, 25])
prediction = weights @ features
print(f"Weighted prediction: {prediction}") # 65.0ML Application: Linear Models
In linear regression and neural networks, predictions are computed as dot products:
import numpy as np
# Simple linear model: y = w·x + b
def predict(X, weights, bias):
return X @ weights + bias
# Training data: 3 samples, 4 features each
X = np.array([
[1500, 3, 2, 0.25], # House 1
[2000, 4, 3, 0.5], # House 2
[1200, 2, 1, 0.15] # House 3
])
# Learned weights (one per feature)
weights = np.array([100, 10000, 5000, 50000])
bias = 50000
# Predictions for all houses
predictions = predict(X, weights, bias)
print(f"Predicted prices: {predictions}")
# [237500, 332500, 182500]Vector Norms
Norms measure the "size" or "length" of a vector. Different norms are used in different ML contexts:
L2 Norm (Euclidean):
L1 Norm (Manhattan):
L-infinity Norm (Max):
import numpy as np
v = np.array([3, -4, 0])
# L2 norm (Euclidean distance from origin)
l2_norm = np.linalg.norm(v) # or np.sqrt(np.sum(v**2))
print(f"L2 norm: {l2_norm}") # 5.0
# L1 norm (sum of absolute values)
l1_norm = np.linalg.norm(v, ord=1) # or np.sum(np.abs(v))
print(f"L1 norm: {l1_norm}") # 7.0
# L-infinity norm (maximum absolute value)
linf_norm = np.linalg.norm(v, ord=np.inf) # or np.max(np.abs(v))
print(f"L-inf norm: {linf_norm}") # 4.0
# Visualizing different norm "balls"
import matplotlib.pyplot as plt
theta = np.linspace(0, 2*np.pi, 100)
# L2 ball (circle)
x_l2 = np.cos(theta)
y_l2 = np.sin(theta)
# L1 ball (diamond)
t = np.linspace(0, 2*np.pi, 100)
x_l1 = np.sign(np.cos(t)) * np.abs(np.cos(t))
y_l1 = np.sign(np.sin(t)) * np.abs(np.sin(t))
scale = 1 / (np.abs(x_l1) + np.abs(y_l1) + 1e-10)
x_l1, y_l1 = x_l1 * scale, y_l1 * scale
plt.figure(figsize=(8, 4))
plt.subplot(1, 2, 1)
plt.plot(x_l2, y_l2, 'b-', label='L2 ball')
plt.title('L2 Norm Ball (Circle)')
plt.axis('equal')
plt.grid(True)
plt.subplot(1, 2, 2)
plt.plot([-1, 0, 1, 0, -1], [0, 1, 0, -1, 0], 'r-', label='L1 ball')
plt.title('L1 Norm Ball (Diamond)')
plt.axis('equal')
plt.grid(True)
plt.tight_layout()
plt.savefig('norm_balls.png', dpi=100)
plt.show()Unit Vectors and Normalization
A unit vector has norm 1. Normalizing a vector means scaling it to have unit length:
Normalization is crucial in ML for:
- Preventing features with large magnitudes from dominating
- Computing cosine similarity
- Ensuring stable gradient descent
import numpy as np
v = np.array([3, 4])
# Normalize to unit vector
norm = np.linalg.norm(v)
unit_v = v / norm
print(f"Original vector: {v}")
print(f"Norm: {norm}") # 5.0
print(f"Unit vector: {unit_v}") # [0.6, 0.8]
print(f"Unit vector norm: {np.linalg.norm(unit_v)}") # 1.0
# Normalize a dataset (feature scaling)
data = np.array([
[100, 0.5],
[200, 0.8],
[150, 0.3]
])
# Normalize each row to unit length
row_norms = np.linalg.norm(data, axis=1, keepdims=True)
normalized_data = data / row_norms
print("Original data:")
print(data)
print("\nNormalized data (each row has L2 norm = 1):")
print(normalized_data)Cosine Similarity
Cosine similarity measures the angle between vectors, ignoring magnitude:
This ranges from -1 (opposite directions) to 1 (same direction).
import numpy as np
def cosine_similarity(u, v):
return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))
# Document vectors (word counts)
doc1 = np.array([3, 2, 0, 5, 0, 0, 0, 2, 0, 0]) # "machine learning"
doc2 = np.array([3, 0, 0, 4, 0, 0, 0, 1, 0, 0]) # "deep learning"
doc3 = np.array([0, 0, 7, 0, 0, 6, 0, 0, 0, 0]) # "cooking recipe"
sim_12 = cosine_similarity(doc1, doc2)
sim_13 = cosine_similarity(doc1, doc3)
sim_23 = cosine_similarity(doc2, doc3)
print(f"Similarity (doc1, doc2): {sim_12:.4f}") # High - both about ML
print(f"Similarity (doc1, doc3): {sim_13:.4f}") # Low - different topics
print(f"Similarity (doc2, doc3): {sim_23:.4f}") # Low - different topics
# Using scikit-learn
from sklearn.metrics.pairwise import cosine_similarity as sklearn_cosine
# sklearn expects 2D arrays
docs = np.array([doc1, doc2, doc3])
similarity_matrix = sklearn_cosine(docs)
print("\nSimilarity matrix:")
print(similarity_matrix)Vector Spaces
A vector space is a set of vectors closed under addition and scalar multiplication. Key concepts:
Linear Combination: A sum of scaled vectors
Span: All possible linear combinations of a set of vectors
Linear Independence: Vectors where no vector can be written as a linear combination of others
Basis: A linearly independent set that spans the space
import numpy as np
# Standard basis vectors in R^3
e1 = np.array([1, 0, 0])
e2 = np.array([0, 1, 0])
e3 = np.array([0, 0, 1])
# Any vector can be written as a linear combination of basis vectors
v = np.array([3, -2, 5])
# v = 3*e1 + (-2)*e2 + 5*e3
reconstruction = 3*e1 + (-2)*e2 + 5*e3
print(f"Original: {v}")
print(f"Reconstructed: {reconstruction}")
print(f"Equal: {np.allclose(v, reconstruction)}")
# Checking linear independence using matrix rank
vectors = np.array([
[1, 0, 0],
[0, 1, 0],
[0, 0, 1]
])
rank = np.linalg.matrix_rank(vectors)
print(f"\nRank of standard basis: {rank}") # 3 (full rank = linearly independent)
# Linearly dependent vectors
dependent = np.array([
[1, 2, 3],
[2, 4, 6], # 2 * first vector
[0, 1, 0]
])
rank_dep = np.linalg.matrix_rank(dependent)
print(f"Rank of dependent set: {rank_dep}") # 2 (not full rank)ML Application: Feature Vectors and Similarity Search
A complete example using vectors for ML:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
# Load the Iris dataset
iris = load_iris()
X = iris.data # 150 samples, 4 features each
y = iris.target
feature_names = iris.feature_names
print(f"Dataset shape: {X.shape}")
print(f"Features: {feature_names}")
print(f"\nFirst 3 samples (as vectors):")
for i in range(3):
print(f" Sample {i}: {X[i]} -> Class {iris.target_names[y[i]]}")
# Normalize features
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)
# Find most similar flowers to the first sample
query = X_normalized[0].reshape(1, -1)
# Compute cosine similarities
similarities = cosine_similarity(query, X_normalized)[0]
# Find top 5 most similar (excluding itself)
top_indices = np.argsort(similarities)[::-1][1:6]
print(f"\nMost similar to sample 0 ({iris.target_names[y[0]]}):")
for idx in top_indices:
print(f" Sample {idx}: similarity={similarities[idx]:.4f}, class={iris.target_names[y[idx]]}")
# Compute pairwise Euclidean distances
distances = euclidean_distances(X_normalized)
print(f"\nDistance matrix shape: {distances.shape}")
print(f"Distance from sample 0 to sample 1: {distances[0, 1]:.4f}")
# Visualize high-dimensional vectors using PCA
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
pca = PCA(n_components=2)
X_2d = pca.fit_transform(X_normalized)
plt.figure(figsize=(10, 8))
for i, name in enumerate(iris.target_names):
mask = y == i
plt.scatter(X_2d[mask, 0], X_2d[mask, 1], label=name, alpha=0.7)
plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.2%} variance)')
plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.2%} variance)')
plt.title('Iris Dataset: 4D vectors projected to 2D')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig('iris_vectors.png', dpi=100)
plt.show()Summary
Key concepts for ML practitioners:
| Concept | ML Application | |---------|----------------| | Vectors | Feature representation, embeddings | | Dot product | Linear models, attention mechanisms | | Norms | Regularization (L1/L2), distance metrics | | Normalization | Feature scaling, stable training | | Cosine similarity | Text similarity, recommendation systems | | Vector spaces | Dimensionality reduction, PCA |
Understanding these vector operations is fundamental to:
- Building and understanding neural networks
- Implementing similarity search and clustering
- Working with embeddings and representations
- Debugging and optimizing ML models