Inverted Pendulum

In this example, a mode predictive controller (MPC) is used to swing up and stabilize an inverted pendulum mounted on a moving cart.

Figure of the MPC solution to the inverted pendulum problem.

Plot of the states and the control signal of the MPC solution.

The state vector consist of the angle of the pendulum \(\theta\), its angular velocity \(\omega\), the position of the cart \(x\), and its velocity \(v\). We use the following model:

\[\begin{split}\newcommand{\pend}[1]{#1_\mathrm{pend}} \newcommand{\cart}[1]{#1_\mathrm{cart}} \begin{aligned} \pend{a} &= \pend{l} \sin(\theta)\, \omega^2 - g \cos(\theta) \sin(\theta) \\ a &= \frac{F - \cart{b}\, v + \pend{m}\, \pend{a}}{\cart{m} + \pend{m}} \\ \alpha &= \frac{g \sin(\theta) - a \cos(\theta)}{\pend{l}} \\ \begin{pmatrix} \dot \theta \\ \dot \omega \\ \dot x \\ \dot v \end{pmatrix} &= \begin{pmatrix} \omega \\ \alpha \\ v \\ a \end{pmatrix} \end{aligned}\end{split}\]

A quadratic cost function is used, applied to the sine of half the pendulum angle, to make it periodic with period \(2\pi\):

\[\ell(x, v, \theta, \omega, F) = q_\theta \sin^2(\theta/2) + q_\omega\, \omega^2 + q_x\, x^2 + q_v\, v^2 + r_F\, F^2\]

The force \(F\) exerted on the cart is limited to \(\pm 5 \mathrm{N}\).

  1# %% Model
  2
  3import casadi as cs
  4import numpy as np
  5
  6# State vector
  7θ = cs.SX.sym("θ")  # Pendulum angle                     [rad]
  8ω = cs.SX.sym("ω")  # Pendulum angular velocity          [rad/s]
  9x = cs.SX.sym("x")  # Cart position                      [m]
 10v = cs.SX.sym("v")  # Cart velocity                      [m/s]
 11state = cs.vertcat(θ, ω, x, v)  # Full state vector
 12nx = state.shape[0]  # Number of states
 13
 14# Input
 15F = cs.SX.sym("F")  # External force applied to the cart [N]
 16nu = F.shape[0]  # Number of inputs
 17
 18# Parameters
 19F_max = 5  #        Maximum force applied to the cart    [N]
 20m_cart = 0.8  #     Mass of the cart                     [kg]
 21m_pend = 0.3  #     Mass of the pendulum                 [kg]
 22b_cart = 0.1  #     Friction coefficient of the cart     [N/m/s]
 23l_pend = 0.3  #     Length of the pendulum               [m]
 24g_gravity = 9.81  # Gravitational acceleration           [m/s²]
 25Ts = 0.025  #       Simulation sampling time             [s]
 26N_horiz = 50  #     MPC horizon                          [time steps]
 27N_sim = 300  #      Simulation length                    [time steps]
 28
 29# Continous-time dynamics
 30a_pend = l_pend * cs.sin(θ) * ω**2 - g_gravity * cs.cos(θ) * cs.sin(θ)
 31a = (F - b_cart * v + m_pend * a_pend) / (m_cart + m_pend)
 32α = (g_gravity * cs.sin(θ) - a * cs.cos(θ)) / l_pend
 33f_c = cs.Function("f_c", [state, F], [cs.vertcat(ω, α, v, a)])
 34
 35# 4th order Runge-Kutta integrator
 36k1 = f_c(state, F)
 37k2 = f_c(state + Ts * k1 / 2, F)
 38k3 = f_c(state + Ts * k2 / 2, F)
 39k4 = f_c(state + Ts * k3, F)
 40
 41# Discrete-time dynamics
 42f_d_expr = state + (Ts / 6) * (k1 + 2 * k2 + 2 * k3 + k4)
 43f_d = cs.Function("f", [state, F], [f_d_expr])
 44
 45
 46# %% Model predictive control
 47
 48# MPC inputs and states
 49mpc_x0 = cs.MX.sym("x0", nx)  # Initial state
 50mpc_u = cs.MX.sym("u", (1, N_horiz))  # Inputs
 51mpc_x = f_d.mapaccum(N_horiz)(mpc_x0, mpc_u)  # Simulated states
 52
 53# MPC cost
 54Q = cs.MX.sym("Q", nx)  # Stage state cost
 55Qf = cs.MX.sym("Qf", nx)  # Terminal state cost
 56R = cs.MX.sym("R", nu)  # Stage input cost
 57s, u = cs.MX.sym("s", nx), cs.MX.sym("u", nu)
 58sin_s = cs.vertcat(cs.sin(s[0] / 2), s[1:])  # Penalize sin(θ/2), not θ
 59stage_cost_x = cs.Function("lx", [s, Q], [cs.dot(sin_s, cs.diag(Q) @ sin_s)])
 60terminal_cost_x = cs.Function("lf", [s, Qf], [cs.dot(sin_s, cs.diag(Qf) @ sin_s)])
 61stage_cost_u = cs.Function("lu", [u, R], [cs.dot(u, cs.diag(R) @ u)])
 62
 63mpc_param = cs.vertcat(mpc_x0, Q, Qf, R)
 64mpc_y_cost = cs.sum2(stage_cost_x.map(N_horiz - 1)(mpc_x[:, :-1], Q))
 65mpc_u_cost = cs.sum2(stage_cost_u.map(N_horiz)(mpc_u, R))
 66mpc_terminal_cost = terminal_cost_x(mpc_x[:, -1], Qf)
 67mpc_cost = mpc_y_cost + mpc_u_cost + mpc_terminal_cost
 68
 69# Box constraints on the actuator force:
 70C = -F_max * np.ones(N_horiz), +F_max * np.ones(N_horiz)
 71
 72# Initial state of the system
 73state_0 = np.array([-np.pi, 0, 0.2, 0])
 74
 75# Initial parameter value
 76Q = [10, 1e-2, 1, 1e-2]
 77Qf = np.array([10, 1e-1, 1000, 1e-1])
 78R = [1e-1]
 79param_0 = np.concatenate((state_0, Q, Qf, R))
 80
 81# Compile into an alpaqa problem
 82import alpaqa
 83
 84# Generate C code for the cost function, compile it, and load it as an
 85# alpaqa problem description:
 86problem = (
 87    alpaqa.minimize(mpc_cost, cs.vec(mpc_u))  # objective and variables
 88    .subject_to_box(C)  #                       box constraints on the variables
 89    .with_param(mpc_param, param_0)  #          parameters to be changed later
 90    .with_name(f"inverted_pendulum_{N_horiz}")  # name used in generated files
 91).compile(sym=cs.MX.sym)
 92
 93from datetime import timedelta
 94
 95# Configure an alpaqa solver:
 96Solver = alpaqa.PANOCSolver
 97inner_solver = Solver(
 98    panoc_params={
 99        "max_time": timedelta(seconds=Ts),
100        "max_iter": 200,
101        "stop_crit": alpaqa.PANOCStopCrit.ProjGradUnitNorm2,
102    },
103    direction=alpaqa.StructuredLBFGSDirection(
104        lbfgs_params={"memory": 15},
105        direction_params={"hessian_vec_factor": 0},
106    ),
107)
108alm_params = {
109    "tolerance": 1e-4,
110    "initial_tolerance": 1e-4,
111}
112solver = alpaqa.ALMSolver(
113    alm_params=alm_params,
114    inner_solver=inner_solver,
115)
116
117
118# %% Controller class
119
120
121# Wrap the solver in a class that solves the optimal control problem at each
122# time step, implementing warm starting:
123class MPCController:
124    def __init__(self, problem: alpaqa.CasADiProblem):
125        self.problem = problem
126        self.tot_it = 0
127        self.tot_time = timedelta()
128        self.max_time = timedelta()
129        self.failures = 0
130        self.u = np.ones(problem.num_variables)
131
132    def __call__(self, state_n):
133        state_n = np.array(state_n).ravel()
134        # Set the current state as the initial state
135        self.problem.param[:nx] = state_n
136        # Shift over the previous solution by one time step
137        self.u = np.concatenate((self.u[nu:], self.u[-nu:]))
138        # Solve the optimal control problem
139        # (warm start using the shifted previous solution)
140        self.u, _, stats = solver(self.problem, self.u)
141        # Print some solver statistics
142        print(
143            f"{stats['status']!s:24} iter={stats['inner']['iterations']:4} "
144            f"time={stats['elapsed_time']} "
145            f"failures={stats['inner_convergence_failures']} "
146            f"tol={stats['ε']:1.3e} stepsize={stats['inner']['final_γ']:1.3e}"
147        )
148        self.tot_it += stats["inner"]["iterations"]
149        self.failures += stats["status"] != alpaqa.SolverStatus.Converged
150        self.tot_time += stats["elapsed_time"]
151        self.max_time = max(self.max_time, stats["elapsed_time"])
152        # Return the optimal control signal for the first time step
153        return self.u[:nu]
154
155
156# %% Simulate the system using the MPC controller
157
158# Simulation
159mpc_states = np.empty((nx, N_sim + 1))
160mpc_inputs = np.empty((nu, N_sim))
161mpc_states[:, 0] = state_0
162controller = MPCController(problem)
163for n in range(N_sim):
164    # Solve the optimal control problem:
165    mpc_inputs[:, n] = controller(mpc_states[:, n])
166    # Apply the first optimal control input to the system and simulate for
167    # one time step, then update the state:
168    mpc_states[:, n + 1] = f_d(mpc_states[:, n], mpc_inputs[:, n]).T
169
170print(f"{controller.tot_it} iterations, {controller.failures} failures")
171print(
172    f"time: {controller.tot_time} (total), {controller.max_time} (max), "
173    f"{controller.tot_time / N_sim} (avg)"
174)
175
176
177# %% Visualize the results
178
179import matplotlib.pyplot as plt
180import matplotlib as mpl
181from matplotlib import animation
182
183mpl.rcParams["animation.frame_format"] = "svg"
184
185# Plot the cart and pendulum
186fig, ax = plt.subplots()
187h = 0.04
188x = [state_0[2], state_0[2] + l_pend * np.sin(state_0[0])]
189y = [h, h + l_pend * np.cos(state_0[0])]
190(target_pend,) = ax.plot(x, y, "--", color="tab:blue", label="Initial state")
191state_N = [0, 0, 0, 0]
192x = [state_N[2], state_N[2] + l_pend * np.sin(state_N[0])]
193y = [h, h + l_pend * np.cos(state_N[0])]
194(target_pend,) = ax.plot(x, y, "--", color="tab:green", label="Target state")
195(pend,) = ax.plot(x, y, "-o", color="tab:orange", alpha=0.9, label="MPC")
196cart = plt.Rectangle((-2 * h, 0), 4 * h, h, color="tab:orange")
197ax.add_patch(cart)
198plt.legend()
199plt.ylim([-l_pend + h, l_pend + 2 * h])
200plt.xlim([-1.5 * l_pend, +1.5 * l_pend])
201plt.gca().set_aspect("equal", "box")
202plt.tight_layout()
203
204
205class Animation:
206    def __call__(self, n):
207        state_n = mpc_states[:, n]
208        x = [state_n[2], state_n[2] + l_pend * np.sin(state_n[0])]
209        y = [h, h + l_pend * np.cos(state_n[0])]
210        pend.set_xdata(x)
211        pend.set_ydata(y)
212        cart.set_x(state_n[2] - 2 * h)
213        return [pend, cart]
214
215
216ani = animation.FuncAnimation(
217    fig, Animation(), interval=1000 * Ts, blit=True, repeat=True, frames=N_sim
218)
219
220fig, axs = plt.subplots(5, sharex=True, figsize=(6, 9))
221ts = np.arange(N_sim + 1) * Ts
222labels = [
223    "Angle $\\theta$ [rad]",
224    "Angular velocity $\\omega$ [rad/s]",
225    "Position $x$ [m]",
226    "Velocity $v$ [m/s]",
227]
228for i, (ax, lbl) in enumerate(zip(axs[:-1], labels)):
229    ax.plot(ts, mpc_states[i, :])
230    ax.set_title(lbl)
231ax = axs[-1]
232ax.plot(ts[:N_sim], mpc_inputs.T)
233ax.set_title("Control input $F$ [N]")
234ax.set_xlabel("Simulation time $t$ [s]")
235ax.set_xlim(0, N_sim * Ts)
236plt.tight_layout()
237
238# Show the animation
239plt.show()