Appendix D: Basics of Numpy and Tensors#
Mahmood Amintoosi, Spring 2024
Computer Science Dept, Ferdowsi University of Mashhad
References and other resources#
# Auto-setup when running on Google Colab
import os
if 'google.colab' in str(get_ipython()) and not os.path.exists('/content/neural-networks'):
!git clone -q https://github.com/fum-cs/neural-networks.git /content/neural-networks
!pip --quiet install -r /content/neural-networks/requirements_colab.txt
%cd neural-networks/notebooks
Python Numpy Library
import numpy as np
X = np.array([1, 2, 3, 4, 5])
print(X)
[1 2 3 4 5]
X = np.array([[1, 2, 3],[4, 5, 6]])
X, X.shape
(array([[1, 2, 3],
[4, 5, 6]]),
(2, 3))
So why PyTorch?#
Auto Grad
GPU Support
Simple Python
Easy to use + debug
Supported/developed by Facebook
Nice and extensible interface (modules, etc.)
A lot of research code is published as PyTorch project
import torch
print("PyTorch Version:", torch.__version__)
PyTorch Version: 2.2.0+cpu
Very similar to numpy framework (if that helps!)
Tensor Creation#
First of all, what is a tensor?#
A matrix is a grid of numbers, let’s say (3x5). In simple terms, a tensor can be seen as a generalization of a matrix to higher dimension. It can be of arbitrary shape, e.g. (3 x 6 x 2 x 10).
For the start, you can think of tensors as multidimensional arrays.
X = torch.tensor([1, 2, 3, 4, 5])
X
tensor([1, 2, 3, 4, 5])
X.shape
torch.Size([5])
X = torch.tensor([[1, 2, 3], [4, 5, 6]])
X
tensor([[1, 2, 3],
[4, 5, 6]])
X.shape
torch.Size([2, 3])
# numpy
np.eye(3)
array([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
# torch
torch.eye(3)
tensor([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
# numpy
5 * np.eye(3)
array([[5., 0., 0.],
[0., 5., 0.],
[0., 0., 5.]])
# torch
5 * torch.eye(3)
tensor([[5., 0., 0.],
[0., 5., 0.],
[0., 0., 5.]])
# numpy
np.ones(5)
array([1., 1., 1., 1., 1.])
# torch
torch.ones(5)
tensor([1., 1., 1., 1., 1.])
# numpy
np.zeros(5)
array([0., 0., 0., 0., 0.])
# torch
torch.zeros(5)
tensor([0., 0., 0., 0., 0.])
# numpy
np.empty((3, 5))
array([[0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
0.00000000e+000, 0.00000000e+000],
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
0.00000000e+000, 0.00000000e+000],
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
0.00000000e+000, 2.65706415e-312]])
# torch
torch.empty((3, 5))
tensor([[-9.6192e-02, 1.7642e-42, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[ 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[ 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]])
# numpy
X = np.random.random((5, 3))
X
array([[0.81457294, 0.60368816, 0.02025257],
[0.47869276, 0.68703585, 0.28019497],
[0.49741779, 0.11215971, 0.28736338],
[0.31060351, 0.18414884, 0.45232343],
[0.28798753, 0.51982924, 0.56047426]])
# torch
Y = torch.rand((5, 3))
Y
tensor([[0.1348, 0.5308, 0.1641],
[0.0861, 0.7519, 0.5199],
[0.5603, 0.2654, 0.2679],
[0.7880, 0.0453, 0.0400],
[0.9354, 0.3329, 0.8818]])
# numpy
X.shape
(5, 3)
# torch
Y.shape
torch.Size([5, 3])
But wait: Why do we even need tensors if we can do exactly the same with numpy arrays?#
torch.tensor
behaves like numpy arrays under mathematical operations. However, torch.tensor
additionally keeps track of the gradients (see next notebook) and provides GPU support.
Linear Algebra Operations#
np.random.seed(42)
X = np.random.rand(3, 5)
Y = torch.rand(3, 5)
# numpy (matrix multiplication)
X.T @ X
array([[0.16503831, 0.38510648, 0.42641524, 0.32236316, 0.17263346],
[0.38510648, 1.84795652, 1.55362207, 0.81001791, 0.36581039],
[0.42641524, 1.55362207, 1.979037 , 1.135646 , 0.87887913],
[0.32236316, 0.81001791, 1.135646 , 0.76481914, 0.55764349],
[0.17263346, 0.36581039, 0.87887913, 0.55764349, 0.55876891]])
Y.shape
torch.Size([3, 5])
# torch (matrix multiplication)
Y.t() @ Y
tensor([[0.9341, 0.2604, 0.5407, 0.2253, 1.0566],
[0.2604, 0.1549, 0.3039, 0.1691, 0.4157],
[0.5407, 0.3039, 0.8331, 0.3833, 0.7010],
[0.2253, 0.1691, 0.3833, 0.2045, 0.3794],
[1.0566, 0.4157, 0.7010, 0.3794, 1.4520]])
Y.t().matmul(Y)
tensor([[0.9341, 0.2604, 0.5407, 0.2253, 1.0566],
[0.2604, 0.1549, 0.3039, 0.1691, 0.4157],
[0.5407, 0.3039, 0.8331, 0.3833, 0.7010],
[0.2253, 0.1691, 0.3833, 0.2045, 0.3794],
[1.0566, 0.4157, 0.7010, 0.3794, 1.4520]])
# CAUTION: Operator '*' does element-wise multiplication, just like in numpy!
# Y.t() * Y # error, dimensions do not match for element-wise multiplication
np.linalg.inv(X.T @ X)
array([[ 1.30233439e+16, 6.34017941e+15, -8.31135944e+15,
-1.26104532e+16, 1.74835230e+16],
[ 3.59410056e+15, -7.96605388e+14, 1.63965035e+15,
-2.92300572e+15, -2.50756326e+14],
[-4.06943337e+15, 1.95223704e+15, -3.47888622e+15,
3.07977842e+15, 2.37750140e+15],
[-1.20095990e+16, -5.28950819e+15, 6.80376017e+15,
1.15069415e+16, -1.50119988e+16],
[ 1.20095990e+16, 7.70906987e+14, 1.76218655e+14,
-1.05182516e+16, 6.00479950e+15]])
torch.inverse(Y.t() @ Y)
tensor([[ -4196320.5000, -13226547.0000, 3300869.0000, -698496.0625,
5429491.5000],
[-10183855.0000, -80709272.0000, 3046004.7500, 35642840.0000,
19735810.0000],
[ 3611629.2500, 6418907.0000, -3347998.0000, 4414598.5000,
-4003062.7500],
[ -3035611.0000, 27769944.0000, 6201265.5000, -29184842.0000,
-1110512.3750],
[ 5018926.5000, 22378582.0000, -3278026.2500, -4202772.0000,
-7378903.5000]])
np.arange(2, 10, 2)
array([2, 4, 6, 8])
torch.arange(2, 10, 2)
tensor([2, 4, 6, 8])
np.linspace(0, 1, 10)
array([0. , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
0.55555556, 0.66666667, 0.77777778, 0.88888889, 1. ])
torch.linspace(0, 1, 10)
tensor([0.0000, 0.1111, 0.2222, 0.3333, 0.4444, 0.5556, 0.6667, 0.7778, 0.8889,
1.0000])
Your turn#
Create the tensor:
\( \begin{bmatrix} 5 & 7 & 9 & 11 & 13 & 15 & 17 & 19 \end{bmatrix} \)
# YOUR TURN
More on PyTorch Tensors#
Each operation is also available as a function.
X = torch.rand(3, 2)
torch.exp(X)
tensor([[1.8398, 1.7365],
[2.2936, 2.6004],
[1.8534, 2.5880]])
X.exp()
tensor([[1.8398, 1.7365],
[2.2936, 2.6004],
[1.8534, 2.5880]])
X.sqrt()
tensor([[0.7808, 0.7429],
[0.9111, 0.9776],
[0.7855, 0.9751]])
(X.exp() + 2).sqrt() - 2 * X.log().sigmoid() # be creative :-)
tensor([[1.2020, 1.2218],
[1.1649, 1.1675],
[1.1998, 1.1671]])
Many more functions available: sin, cos, tanh, log, etc.
A = torch.eye(3)
A
tensor([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
A.add(5)
tensor([[6., 5., 5.],
[5., 6., 5.],
[5., 5., 6.]])
A
tensor([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
Functions that mutate (in-place) the passed object end with an underscore, e.g. add_, div_, etc.
A.add_(5)
tensor([[6., 5., 5.],
[5., 6., 5.],
[5., 5., 6.]])
A
tensor([[6., 5., 5.],
[5., 6., 5.],
[5., 5., 6.]])
A.div_(3)
tensor([[2.0000, 1.6667, 1.6667],
[1.6667, 2.0000, 1.6667],
[1.6667, 1.6667, 2.0000]])
A
tensor([[2.0000, 1.6667, 1.6667],
[1.6667, 2.0000, 1.6667],
[1.6667, 1.6667, 2.0000]])
A.uniform_() # fills the tensor with random uniform numbers in [0, 1]
tensor([[0.5081, 0.3133, 0.9799],
[0.1257, 0.8718, 0.3359],
[0.8081, 0.0859, 0.7496]])
A
tensor([[0.5081, 0.3133, 0.9799],
[0.1257, 0.8718, 0.3359],
[0.8081, 0.0859, 0.7496]])
Indexing#
Again, it works just like in numpy.
A = torch.randint(100, (3, 3))
A
tensor([[92, 73, 83],
[63, 71, 86],
[47, 18, 5]])
A[0, 0]
tensor(92)
A[2, 1]
tensor(18)
A[1]
tensor([63, 71, 86])
A[:, 1]
tensor([73, 71, 18])
A[1:2, :], A[1:2, :].shape
(tensor([[63, 71, 86]]), torch.Size([1, 3]))
A[1:, 1:]
tensor([[71, 86],
[18, 5]])
A[:2, :2]
tensor([[92, 73],
[63, 71]])
Reshaping & Expanding#
X = torch.tensor([1, 2, 3, 4])
X
tensor([1, 2, 3, 4])
X = X.repeat(3, 1) # repeat it 3 times along 0th dimension and 1 times along first dimension
X, X.shape
(tensor([[1, 2, 3, 4],
[1, 2, 3, 4],
[1, 2, 3, 4]]),
torch.Size([3, 4]))
X = torch.tensor([[1,2,3,4],
[5,6,7,8],[9,10,11,12]])
X, X.shape
(tensor([[ 1, 2, 3, 4],
[ 5, 6, 7, 8],
[ 9, 10, 11, 12]]),
torch.Size([3, 4]))
# equivalent of 'reshape' in numpy (view does not allocate new memory!)
Y = X.view(2, 6)
Y
tensor([[ 1, 2, 3, 4, 5, 6],
[ 7, 8, 9, 10, 11, 12]])
Y = X.view(-1) # -1 tells PyTorch to infer the number of elements along that dimension
Y, Y.shape
(tensor([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), torch.Size([12]))
Y = X.view(-1, 2)
Y, Y.shape
(tensor([[ 1, 2],
[ 3, 4],
[ 5, 6],
[ 7, 8],
[ 9, 10],
[11, 12]]),
torch.Size([6, 2]))
Y = X.view(-1, 4)
Y, Y.shape
(tensor([[ 1, 2, 3, 4],
[ 5, 6, 7, 8],
[ 9, 10, 11, 12]]),
torch.Size([3, 4]))
Y = torch.ones(5)
Y, Y.shape
(tensor([1., 1., 1., 1., 1.]), torch.Size([5]))
Y = Y.view(-1, 1)
Y, Y.shape
(tensor([[1.],
[1.],
[1.],
[1.],
[1.]]),
torch.Size([5, 1]))
Y.expand(5, 5) # similar to repeat but does not actually allocate new memory
tensor([[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]])
X = torch.eye(4)
Y = X[3:, :]
Y, Y.shape
(tensor([[0., 0., 0., 1.]]), torch.Size([1, 4]))
Y = Y.squeeze() # removes all dimensions of size '1'
Y, Y.shape
(tensor([0., 0., 0., 1.]), torch.Size([4]))
Y = Y.unsqueeze(1)
Y, Y.shape
(tensor([[0.],
[0.],
[0.],
[1.]]),
torch.Size([4, 1]))
Note that tensors with shapes [1,4], [4] and [4,1] are different.
Your turn!#
Create the tensor:
\( \begin{bmatrix} 7 & 5 & 5 & 5 & 5 \\ 5 & 7 & 5 & 5 & 5 \\ 5 & 5 & 7 & 5 & 5 \\ 5 & 5 & 5 & 7 & 5 \\ 5 & 5 & 5 & 5 & 7 \end{bmatrix} \)
Hint: You can use matrix sum and scalar multiplication
# YOUR TURN
Create the tensor:
\( \begin{bmatrix} 4 & 6 & 8 & 10 & 12 \\ 14 & 16 & 18 & 20 & 22 \\ 24 & 26 & 28 & 30 & 32 \end{bmatrix}\)
# YOUR TURN
Create the tensor:
\( \begin{bmatrix} 2 & 2 & 2 & 2 & 2 \\ 4 & 4 & 4 & 4 & 4 \\ 6 & 6 & 6 & 6 & 6 \\ 8 & 8 & 8 & 8 & 8 \end{bmatrix} \)
# YOUR TURN
Reductions#
X = torch.randint(10, (3, 4)).float()
X
tensor([[9., 4., 0., 2.],
[4., 8., 9., 4.],
[3., 3., 2., 5.]])
X.sum()
tensor(53.)
X.sum().item()
53.0
X.sum(0) # colum-wise sum
tensor([16., 15., 11., 11.])
X.sum(dim=1) # row-wise sum
tensor([15., 25., 13.])
X.mean()
tensor(4.4167)
X.mean(dim=1)
tensor([3.7500, 6.2500, 3.2500])
X.norm(dim=0)
tensor([10.2956, 9.4340, 9.2195, 6.7082])
Your turn!#
Compute the norms of the row-vectors in matrix X without using torch.norm().
Remember: $\(||\vec{v}||_2 = \sqrt{x_1^2 + x_2^2 + \dots + x_n^2}\)$
Hint: X**2 computes the element-wise square.
X = torch.eye(4) + torch.arange(4).repeat(4, 1).float()
# YOUR TURN
# SOLUTION: tensor([3.8730, 4.1231, 4.3589, 4.5826]
Masking#
X = torch.randint(100, (5, 3))
X
tensor([[55, 2, 95],
[28, 92, 58],
[16, 96, 12],
[57, 31, 38],
[34, 7, 35]])
mask = (X > 25) & (X < 75)
mask
tensor([[ True, False, False],
[ True, False, True],
[False, False, False],
[ True, True, True],
[ True, False, True]])
X[mask] # returns all elements matching the criteria in a 1D-tensor
tensor([55, 28, 58, 57, 31, 38, 34, 35])
mask.sum() # number of elements that fulfill the condition
tensor(8)
(X == 25) | (X > 60)
tensor([[False, False, True],
[False, True, False],
[False, True, False],
[False, False, False],
[False, False, False]])
Your turn!#
Get the number of non-zeros in X
X = torch.tensor([[1, 0, 2], [0, 6, 0]])
# YOUR TURN
Compute the sum of all entries in X that are larger than the mean of all values in X.
# YOUR TURN
Some useful properties of tensors#
x = torch.Tensor([[0,1,2], [3,4,5]])
print("x.shape: \n%s\n" % (x.shape,))
print("x.size(): \n%s\n" % (x.size(),))
print("x.size(1): \n%s\n" % x.size(1))
print("x.dim(): \n%s\n" % x.dim())
print("x.dtype: \n%s\n" % x.dtype)
print("x.device: \n%s\n" % x.device)
x.shape:
torch.Size([2, 3])
x.size():
torch.Size([2, 3])
x.size(1):
3
x.dim():
2
x.dtype:
torch.float32
x.device:
cpu
The nonzero
function returns indices of the non zero elements.
x = torch.Tensor([[0,1,2], [3,4,5]])
print("x.nonzero(): \n%s\n" % x.nonzero())
x.nonzero():
tensor([[0, 1],
[0, 2],
[1, 0],
[1, 1],
[1, 2]])
# press tab to autocomplete
# x.
Converting between PyTorch and numpy#
X = np.random.random((5,3))
X, type(X[0,0])
(array([[0.18340451, 0.30424224, 0.52475643],
[0.43194502, 0.29122914, 0.61185289],
[0.13949386, 0.29214465, 0.36636184],
[0.45606998, 0.78517596, 0.19967378],
[0.51423444, 0.59241457, 0.04645041]]),
numpy.float64)
# numpy ---> torch
Y = torch.from_numpy(X) # Y is actually a DoubleTensor (i.e. 64-bit representation)
Y
tensor([[0.1834, 0.3042, 0.5248],
[0.4319, 0.2912, 0.6119],
[0.1395, 0.2921, 0.3664],
[0.4561, 0.7852, 0.1997],
[0.5142, 0.5924, 0.0465]], dtype=torch.float64)
Y = torch.rand((2,4))
Y
tensor([[0.5804, 0.0088, 0.3779, 0.8511],
[0.2244, 0.2267, 0.7834, 0.5574]])
# torch ---> numpy
X = Y.numpy()
X
array([[0.58037233, 0.00878555, 0.37788534, 0.8510909 ],
[0.22435027, 0.22674036, 0.78335714, 0.5574175 ]], dtype=float32)
Using GPUs#
Using GPU in pytorch is as simple as calling .cuda()
on your tensor.
But first, you may want to check:
that cuda can actually be used :
torch.cuda.is_available()
how many gpus are available :
torch.cuda.device_count()
torch.cuda.is_available()
False
torch.cuda.device_count()
0
x = torch.Tensor([[1,2,3], [4,5,6]])
print(x)
tensor([[1., 2., 3.],
[4., 5., 6.]])
tensor.cuda#
Note : If you don’t have Cuda on the machine, the following examples won’t work
# x.cuda(0)
# print(x.device)
# x = x.cuda(0)
# print(x.device)
# x = x.cuda(1)
# print(x.device)
x = torch.Tensor([[1,2,3], [4,5,6]])
if torch.cuda.is_available():
x = x.cuda()
# This will generate an error since you cannot do operation on tensor that are not on the same device
x + x.cuda()
These kinds of if statements used to be all over the place in people’s pytorch code. Recently, a more flexible way was introduced:
torch.device#
A torch.device
is an object representing the device on which a torch.tensor is or will be allocated.
You can easily move a tensor from a device to another by using the tensor.to()
function
cpu = torch.device('cpu')
x = x.to(cpu)
print(x.device)
# Check yourself in a machine with GPU support, like google colab
# cuda_0 = torch.device('cuda:0')
# x = x.to(cuda_0)
# print(x.device)
cpu
It can be more flexible since you can check if cuda exists only once in your code
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x = x.to(device) # We don't need to care anymore about whether cuda is available or not
print(x.device)
cpu
The output of the above cell, in a machine with GPU support:
cuda:0
Timing GPU#
How much faster is GPU ?
See for yourself on a GPU supported machine
A = torch.rand(100, 1000, 1000)
# B = A.cuda(0)
A.size()
torch.Size([100, 1000, 1000])
# %timeit -n 3 torch.bmm(A, A)
The output of the above cell, in a machine with GPU support:
1.07 s ± 27 ms per loop (mean ± std. dev. of 7 runs, 3 loops each)
# %timeit -n 3 torch.bmm(B, B)
The output of the above cell, in a machine with GPU support:
The slowest run took 15.79 times longer than the fastest. This could mean that an intermediate result is being cached. 72.5 µs ± 90.7 µs per loop (mean ± std. dev. of 7 runs, 3 loops each)
Your turn!#
Run the following cells for yourself on a GPU supported machine
# A = torch.randn(10000, 10000)
# B = A.cuda(0)
# %%time
# u, s, v = torch.svd(A)
# %%time
# u, s, v = torch.svd(B)
# %%time
# u, s, v = torch.linalg.svd(A)
# %%time
# u, s, v = torch.linalg.svd(B)