Inverted Pendulum#
In this example, a mode predictive controller (MPC) is used to swing up and stabilize an inverted pendulum mounted on a moving cart.
The state vector consist of the angle of the pendulum \(\theta\), its angular velocity \(\omega\), the position of the cart \(x\), and its velocity \(v\). We use the following model:
\[\begin{split}\newcommand{\pend}[1]{#1_\mathrm{pend}}
\newcommand{\cart}[1]{#1_\mathrm{cart}}
\begin{aligned}
\pend{a} &= \pend{l} \sin(\theta)\, \omega^2 - g \cos(\theta) \sin(\theta) \\
a &= \frac{F - \cart{b}\, v + \pend{m}\, \pend{a}}{\cart{m} + \pend{m}} \\
\alpha &= \frac{g \sin(\theta) - a \cos(\theta)}{\pend{l}} \\
\begin{pmatrix} \dot \theta \\ \dot \omega \\ \dot x \\ \dot v \end{pmatrix}
&= \begin{pmatrix} \omega \\ \alpha \\ v \\ a \end{pmatrix}
\end{aligned}\end{split}\]
A quadratic cost function is used, applied to the sine of half the pendulum angle, to make it periodic with period \(2\pi\):
\[\ell(x, v, \theta, \omega, F) = q_\theta \sin^2(\theta/2) + q_\omega\, \omega^2 + q_x\, x^2 + q_v\, v^2 + r_F\, F^2\]
The force \(F\) exerted on the cart is limited to \(\pm 5 \mathrm{N}\).
1# %% Model
2
3import casadi as cs
4import numpy as np
5
6# State vector
7θ = cs.SX.sym("θ") # Pendulum angle [rad]
8ω = cs.SX.sym("ω") # Pendulum angular velocity [rad/s]
9x = cs.SX.sym("x") # Cart position [m]
10v = cs.SX.sym("v") # Cart velocity [m/s]
11state = cs.vertcat(θ, ω, x, v) # Full state vector
12nx = state.shape[0] # Number of states
13
14# Input
15F = cs.SX.sym("F") # External force applied to the cart [N]
16nu = F.shape[0] # Number of inputs
17
18# Parameters
19F_max = 5 # Maximum force applied to the cart [N]
20m_cart = 0.8 # Mass of the cart [kg]
21m_pend = 0.3 # Mass of the pendulum [kg]
22b_cart = 0.1 # Friction coefficient of the cart [N/m/s]
23l_pend = 0.3 # Length of the pendulum [m]
24g_gravity = 9.81 # Gravitational acceleration [m/s²]
25Ts = 0.025 # Simulation sampling time [s]
26N_horiz = 50 # MPC horizon [time steps]
27N_sim = 300 # Simulation length [time steps]
28
29# Continous-time dynamics
30a_pend = (l_pend * cs.sin(θ) * ω**2 - g_gravity * cs.cos(θ) * cs.sin(θ))
31a = (F - b_cart * v + m_pend * a_pend) / (m_cart + m_pend)
32α = (g_gravity * cs.sin(θ) - a * cs.cos(θ)) / l_pend
33f_c = cs.Function("f_c", [state, F], [cs.vertcat(ω, α, v, a)])
34
35# 4th order Runge-Kutta integrator
36k1 = f_c(state, F)
37k2 = f_c(state + Ts * k1 / 2, F)
38k3 = f_c(state + Ts * k2 / 2, F)
39k4 = f_c(state + Ts * k3, F)
40
41# Discrete-time dynamics
42f_d_expr = state + (Ts / 6) * (k1 + 2 * k2 + 2 * k3 + k4)
43f_d = cs.Function("f", [state, F], [f_d_expr])
44
45
46# %% Model predictive control
47
48# MPC inputs and states
49mpc_x0 = cs.MX.sym("x0", nx) # Initial state
50mpc_u = cs.MX.sym("u", (1, N_horiz)) # Inputs
51mpc_x = f_d.mapaccum(N_horiz)(mpc_x0, mpc_u) # Simulated states
52
53# MPC cost
54Q = cs.MX.sym("Q", nx) # Stage state cost
55Qf = cs.MX.sym("Qf", nx) # Terminal state cost
56R = cs.MX.sym("R", nu) # Stage input cost
57s, u = cs.MX.sym("s", nx), cs.MX.sym("u", nu)
58sin_s = cs.vertcat(cs.sin(s[0] / 2), s[1:]) # Penalize sin(θ/2), not θ
59stage_cost_x = cs.Function("lx", [s, Q], [cs.dot(sin_s, cs.diag(Q) @ sin_s)])
60terminal_cost_x = cs.Function("lf", [s, Qf], [cs.dot(sin_s, cs.diag(Qf) @ sin_s)])
61stage_cost_u = cs.Function("lu", [u, R], [cs.dot(u, cs.diag(R) @ u)])
62
63mpc_param = cs.vertcat(mpc_x0, Q, Qf, R)
64mpc_y_cost = cs.sum2(stage_cost_x.map(N_horiz - 1)(mpc_x[:, :-1], Q))
65mpc_u_cost = cs.sum2(stage_cost_u.map(N_horiz)(mpc_u, R))
66mpc_terminal_cost = terminal_cost_x(mpc_x[:, -1], Qf)
67mpc_cost_fun = cs.Function('f_mpc', [cs.vec(mpc_u), mpc_param],
68 [mpc_y_cost + mpc_u_cost + mpc_terminal_cost])
69
70# Compile into an alpaqa problem
71from alpaqa import casadi_loader as cl
72
73# Generate C code for the cost function, compile it, and load it as an
74# alpaqa problem description:
75problem = cl.generate_and_compile_casadi_problem(
76 f=mpc_cost_fun,
77 g=None,
78 sym=cs.MX.sym,
79)
80# Box constraints on the actuator force:
81problem.C.lowerbound = -F_max * np.ones((N_horiz, ))
82problem.C.upperbound = +F_max * np.ones((N_horiz, ))
83
84import alpaqa as pa
85from datetime import timedelta
86
87# Configure an alpaqa solver:
88Solver = pa.PANOCSolver
89inner_solver = Solver(
90 panoc_params={
91 'max_time': timedelta(seconds=Ts),
92 'max_iter': 200,
93 'stop_crit': pa.PANOCStopCrit.ProjGradUnitNorm2,
94 },
95 direction=pa.StructuredLBFGSDirection(
96 lbfgs_params={'memory': 15},
97 direction_params={'hessian_vec': False},
98 ),
99)
100alm_params={
101 'tolerance': 1e-4,'initial_tolerance': 1e-4,
102}
103solver = pa.ALMSolver(
104 alm_params=alm_params,
105 inner_solver=inner_solver,
106)
107
108
109# %% Controller class
110
111# Wrap the solver in a class that solves the optimal control problem at each
112# time step, implementing warm starting:
113class MPCController:
114
115 def __init__(self, problem: pa.CasADiProblem):
116 self.problem = problem
117 self.tot_it = 0
118 self.tot_time = timedelta()
119 self.max_time = timedelta()
120 self.failures = 0
121 self.u = np.ones((nu * N_horiz, ))
122
123 def __call__(self, state_n):
124 state_n = np.array(state_n).ravel()
125 # Set the current state as the initial state
126 self.problem.param[:nx] = state_n
127 # Shift over the previous solution by one time step
128 self.u = np.concatenate((self.u[nu:], self.u[-nu:]))
129 # Solve the optimal control problem
130 # (warm start using the shifted previous solution)
131 self.u, _, stats = solver(self.problem, self.u)
132 # Print some solver statistics
133 print(f"{stats['status']!s:24} {stats['inner']['iterations']:4} "
134 f"{stats['elapsed_time']} {stats['inner_convergence_failures']} "
135 f"{stats['ε']:1.3e} {stats['inner']['final_γ']:1.3e}")
136 self.tot_it += stats['inner']['iterations']
137 self.failures += stats['status'] != pa.SolverStatus.Converged
138 self.tot_time += stats['elapsed_time']
139 self.max_time = max(self.max_time, stats['elapsed_time'])
140 # Return the optimal control signal for the first time step
141 return self.u[:nu]
142
143
144# %% Simulate the system using the MPC controller
145
146state_0 = np.array([-np.pi, 0, 0.2, 0]) # Initial state of the system
147
148# Parameters
149Q = [10, 1e-2, 1, 1e-2]
150Qf = np.array([10, 1e-1, 1000, 1e-1])
151R = [1e-1]
152problem.param = np.concatenate((state_0, Q, Qf, R))
153
154# Simulation
155mpc_states = np.empty((nx, N_sim + 1))
156mpc_inputs = np.empty((nu, N_sim))
157mpc_states[:, 0] = state_0
158controller = MPCController(problem)
159for n in range(N_sim):
160 # Solve the optimal control problem:
161 mpc_inputs[:, n] = controller(mpc_states[:, n])
162 # Apply the first optimal control input to the system and simulate for
163 # one time step, then update the state:
164 mpc_states[:, n + 1] = f_d(mpc_states[:, n], mpc_inputs[:, n]).T
165
166print(f"{controller.tot_it} iterations, {controller.failures} failures")
167print(f"time: {controller.tot_time} (total), {controller.max_time} (max), "
168 f"{controller.tot_time / N_sim} (avg)")
169
170
171# %% Visualize the results
172
173import matplotlib.pyplot as plt
174import matplotlib as mpl
175from matplotlib import animation
176
177mpl.rcParams['animation.frame_format'] = 'svg'
178
179# Plot the cart and pendulum
180fig, ax = plt.subplots()
181h = 0.04
182x = [state_0[2], state_0[2] + l_pend * np.sin(state_0[0])]
183y = [h, h + l_pend * np.cos(state_0[0])]
184target_pend, = ax.plot(x, y, '--', color='tab:blue', linewidth=1, label='Initial state')
185state_N = [0, 0, 0, 0]
186x = [state_N[2], state_N[2] + l_pend * np.sin(state_N[0])]
187y = [h, h + l_pend * np.cos(state_N[0])]
188target_pend, = ax.plot(x, y, '--', color='tab:green', linewidth=1, label='Target state')
189pend, = ax.plot(x, y, '-o', color='tab:orange', alpha=0.9, label='MPC')
190cart = plt.Rectangle((-2 * h, 0), 4 * h, h, color='tab:orange')
191ax.add_patch(cart)
192plt.legend()
193plt.ylim([-l_pend + h, l_pend + 2 * h])
194plt.xlim([-1.5 * l_pend, +1.5 * l_pend])
195plt.gca().set_aspect('equal', 'box')
196plt.tight_layout()
197
198
199class Animation:
200
201 def __call__(self, n):
202 state_n = mpc_states[:, n]
203 x = [state_n[2], state_n[2] + l_pend * np.sin(state_n[0])]
204 y = [h, h + l_pend * np.cos(state_n[0])]
205 pend.set_xdata(x)
206 pend.set_ydata(y)
207 cart.set_x(state_n[2] - 2 * h)
208 return [pend, cart]
209
210
211ani = animation.FuncAnimation(fig,
212 Animation(),
213 interval=1000 * Ts,
214 blit=True,
215 repeat=True,
216 frames=N_sim)
217
218fig, axs = plt.subplots(5, sharex=True, figsize=(6, 9))
219ts = np.arange(N_sim + 1) * Ts
220labels = [
221 "Angle $\\theta$ [rad]",
222 "Angular velocity $\\omega$ [rad/s]",
223 "Position $x$ [m]", "Velocity $v$ [m/s]",
224]
225for i, (ax, lbl) in enumerate(zip(axs[:-1], labels)):
226 ax.plot(ts, mpc_states[i, :])
227 ax.set_title(lbl)
228ax = axs[-1]
229ax.plot(ts[:N_sim], mpc_inputs.T)
230ax.set_title("Control input $F$ [N]")
231ax.set_xlabel("Simulation time $t$ [s]")
232ax.set_xlim(0, N_sim * Ts)
233plt.tight_layout()
234
235# Show the animation
236plt.show()