.. code:: python
%matplotlib inline
import math
from mxnet import np, npx
from d2l import mxnet as d2l
npx.set_np()
.. raw:: html

.. raw:: html
.. code:: python
%matplotlib inline
import math
import torch
from d2l import torch as d2l
.. raw:: html

.. raw:: html
.. code:: python
%matplotlib inline
import math
import tensorflow as tf
from d2l import tensorflow as d2l
.. raw:: html

.. raw:: html
.. code:: python
def f(x1, x2): # Objective function
return x1**2 + 2 * x2**2
def f_grad(x1, x2): # Gradient of the objective function
return 2 * x1, 4 * x2
def sgd(x1, x2, s1, s2, f_grad):
g1, g2 = f_grad(x1, x2)
# Simulate noisy gradient
g1 += np.random.normal(0.0, 1, (1,))
g2 += np.random.normal(0.0, 1, (1,))
eta_t = eta * lr()
return (x1 - eta_t * g1, x2 - eta_t * g2, 0, 0)
def constant_lr():
return 1
eta = 0.1
lr = constant_lr # Constant learning rate
d2l.show_trace_2d(f, d2l.train_2d(sgd, steps=50, f_grad=f_grad))
.. parsed-literal::
:class: output
epoch 50, x1: -0.472513, x2: 0.110780
.. figure:: output_sgd_baca77_15_1.svg
.. raw:: html

.. raw:: html
.. code:: python
def f(x1, x2): # Objective function
return x1**2 + 2 * x2**2
def f_grad(x1, x2): # Gradient of the objective function
return 2 * x1, 4 * x2
def sgd(x1, x2, s1, s2, f_grad):
g1, g2 = f_grad(x1, x2)
# Simulate noisy gradient
g1 += torch.normal(0.0, 1, (1,))
g2 += torch.normal(0.0, 1, (1,))
eta_t = eta * lr()
return (x1 - eta_t * g1, x2 - eta_t * g2, 0, 0)
def constant_lr():
return 1
eta = 0.1
lr = constant_lr # Constant learning rate
d2l.show_trace_2d(f, d2l.train_2d(sgd, steps=50, f_grad=f_grad))
.. parsed-literal::
:class: output
epoch 50, x1: 0.137441, x2: 0.145283
.. figure:: output_sgd_baca77_18_1.svg
.. raw:: html

.. raw:: html
.. code:: python
def f(x1, x2): # Objective function
return x1**2 + 2 * x2**2
def f_grad(x1, x2): # Gradient of the objective function
return 2 * x1, 4 * x2
def sgd(x1, x2, s1, s2, f_grad):
g1, g2 = f_grad(x1, x2)
# Simulate noisy gradient
g1 += tf.random.normal([1], 0.0, 1)
g2 += tf.random.normal([1], 0.0, 1)
eta_t = eta * lr()
return (x1 - eta_t * g1, x2 - eta_t * g2, 0, 0)
def constant_lr():
return 1
eta = 0.1
lr = constant_lr # Constant learning rate
d2l.show_trace_2d(f, d2l.train_2d(sgd, steps=50, f_grad=f_grad))
.. parsed-literal::
:class: output
epoch 50, x1: -0.032286, x2: -0.192288
.. figure:: output_sgd_baca77_21_1.svg
.. raw:: html

.. raw:: html
.. code:: python
def exponential_lr():
# Global variable that is defined outside this function and updated inside
global t
t += 1
return math.exp(-0.1 * t)
t = 1
lr = exponential_lr
d2l.show_trace_2d(f, d2l.train_2d(sgd, steps=1000, f_grad=f_grad))
.. parsed-literal::
:class: output
epoch 1000, x1: -0.820458, x2: 0.004701
.. figure:: output_sgd_baca77_27_1.svg
.. raw:: html

.. raw:: html
.. code:: python
def exponential_lr():
# Global variable that is defined outside this function and updated inside
global t
t += 1
return math.exp(-0.1 * t)
t = 1
lr = exponential_lr
d2l.show_trace_2d(f, d2l.train_2d(sgd, steps=1000, f_grad=f_grad))
.. parsed-literal::
:class: output
epoch 1000, x1: -0.800378, x2: -0.003202
.. figure:: output_sgd_baca77_30_1.svg
.. raw:: html

.. raw:: html
.. code:: python
def exponential_lr():
# Global variable that is defined outside this function and updated inside
global t
t += 1
return math.exp(-0.1 * t)
t = 1
lr = exponential_lr
d2l.show_trace_2d(f, d2l.train_2d(sgd, steps=1000, f_grad=f_grad))
.. parsed-literal::
:class: output
epoch 1000, x1: -0.726561, x2: 0.007033
.. figure:: output_sgd_baca77_33_1.svg
.. raw:: html

.. raw:: html
.. code:: python
def polynomial_lr():
# Global variable that is defined outside this function and updated inside
global t
t += 1
return (1 + 0.1 * t)**(-0.5)
t = 1
lr = polynomial_lr
d2l.show_trace_2d(f, d2l.train_2d(sgd, steps=50, f_grad=f_grad))
.. parsed-literal::
:class: output
epoch 50, x1: 0.025029, x2: 0.115820
.. figure:: output_sgd_baca77_39_1.svg
.. raw:: html

.. raw:: html
.. code:: python
def polynomial_lr():
# Global variable that is defined outside this function and updated inside
global t
t += 1
return (1 + 0.1 * t)**(-0.5)
t = 1
lr = polynomial_lr
d2l.show_trace_2d(f, d2l.train_2d(sgd, steps=50, f_grad=f_grad))
.. parsed-literal::
:class: output
epoch 50, x1: 0.129490, x2: -0.018863
.. figure:: output_sgd_baca77_42_1.svg
.. raw:: html

.. raw:: html
.. code:: python
def polynomial_lr():
# Global variable that is defined outside this function and updated inside
global t
t += 1
return (1 + 0.1 * t)**(-0.5)
t = 1
lr = polynomial_lr
d2l.show_trace_2d(f, d2l.train_2d(sgd, steps=50, f_grad=f_grad))
.. parsed-literal::
:class: output
epoch 50, x1: 0.086213, x2: 0.096072
.. figure:: output_sgd_baca77_45_1.svg
.. raw:: html

.. raw:: html
`Discussions `__
.. raw:: html

.. raw:: html
`Discussions `__
.. raw:: html

.. raw:: html
`Discussions `__
.. raw:: html

.. raw:: html