Inverted Pendulum#
In this example, a mode predictive controller (MPC) is used to swing up and stabilize an inverted pendulum mounted on a moving cart.
The state vector consist of the angle of the pendulum \(\theta\), its angular velocity \(\omega\), the position of the cart \(x\), and its velocity \(v\). We use the following model:
\[\begin{split}\newcommand{\pend}[1]{#1_\mathrm{pend}}
\newcommand{\cart}[1]{#1_\mathrm{cart}}
\begin{aligned}
\pend{a} &= \pend{l} \sin(\theta)\, \omega^2 - g \cos(\theta) \sin(\theta) \\
a &= \frac{F - \cart{b}\, v + \pend{m}\, \pend{a}}{\cart{m} + \pend{m}} \\
\alpha &= \frac{g \sin(\theta) - a \cos(\theta)}{\pend{l}} \\
\begin{pmatrix} \dot \theta \\ \dot \omega \\ \dot x \\ \dot v \end{pmatrix}
&= \begin{pmatrix} \omega \\ \alpha \\ v \\ a \end{pmatrix}
\end{aligned}\end{split}\]
A quadratic cost function is used, applied to the sine of half the pendulum angle, to make it periodic with period \(2\pi\):
\[\ell(x, v, \theta, \omega, F) = q_\theta \sin^2(\theta/2) + q_\omega\, \omega^2 + q_x\, x^2 + q_v\, v^2 + r_F\, F^2\]
The force \(F\) exerted on the cart is limited to \(\pm 5 \mathrm{N}\).
1# %% Model
2
3import casadi as cs
4import numpy as np
5
6# State vector
7θ = cs.SX.sym("θ") # Pendulum angle [rad]
8ω = cs.SX.sym("ω") # Pendulum angular velocity [rad/s]
9x = cs.SX.sym("x") # Cart position [m]
10v = cs.SX.sym("v") # Cart velocity [m/s]
11state = cs.vertcat(θ, ω, x, v) # Full state vector
12nx = state.shape[0] # Number of states
13
14# Input
15F = cs.SX.sym("F") # External force applied to the cart [N]
16nu = F.shape[0] # Number of inputs
17
18# Parameters
19F_max = 5 # Maximum force applied to the cart [N]
20m_cart = 0.8 # Mass of the cart [kg]
21m_pend = 0.3 # Mass of the pendulum [kg]
22b_cart = 0.1 # Friction coefficient of the cart [N/m/s]
23l_pend = 0.3 # Length of the pendulum [m]
24g_gravity = 9.81 # Gravitational acceleration [m/s²]
25Ts = 0.025 # Simulation sampling time [s]
26N_horiz = 50 # MPC horizon [time steps]
27N_sim = 300 # Simulation length [time steps]
28
29# Continous-time dynamics
30a_pend = l_pend * cs.sin(θ) * ω**2 - g_gravity * cs.cos(θ) * cs.sin(θ)
31a = (F - b_cart * v + m_pend * a_pend) / (m_cart + m_pend)
32α = (g_gravity * cs.sin(θ) - a * cs.cos(θ)) / l_pend
33f_c = cs.Function("f_c", [state, F], [cs.vertcat(ω, α, v, a)])
34
35# 4th order Runge-Kutta integrator
36k1 = f_c(state, F)
37k2 = f_c(state + Ts * k1 / 2, F)
38k3 = f_c(state + Ts * k2 / 2, F)
39k4 = f_c(state + Ts * k3, F)
40
41# Discrete-time dynamics
42f_d_expr = state + (Ts / 6) * (k1 + 2 * k2 + 2 * k3 + k4)
43f_d = cs.Function("f", [state, F], [f_d_expr])
44
45
46# %% Model predictive control
47
48# MPC inputs and states
49mpc_x0 = cs.MX.sym("x0", nx) # Initial state
50mpc_u = cs.MX.sym("u", (1, N_horiz)) # Inputs
51mpc_x = f_d.mapaccum(N_horiz)(mpc_x0, mpc_u) # Simulated states
52
53# MPC cost
54Q = cs.MX.sym("Q", nx) # Stage state cost
55Qf = cs.MX.sym("Qf", nx) # Terminal state cost
56R = cs.MX.sym("R", nu) # Stage input cost
57s, u = cs.MX.sym("s", nx), cs.MX.sym("u", nu)
58sin_s = cs.vertcat(cs.sin(s[0] / 2), s[1:]) # Penalize sin(θ/2), not θ
59stage_cost_x = cs.Function("lx", [s, Q], [cs.dot(sin_s, cs.diag(Q) @ sin_s)])
60terminal_cost_x = cs.Function("lf", [s, Qf], [cs.dot(sin_s, cs.diag(Qf) @ sin_s)])
61stage_cost_u = cs.Function("lu", [u, R], [cs.dot(u, cs.diag(R) @ u)])
62
63mpc_param = cs.vertcat(mpc_x0, Q, Qf, R)
64mpc_y_cost = cs.sum2(stage_cost_x.map(N_horiz - 1)(mpc_x[:, :-1], Q))
65mpc_u_cost = cs.sum2(stage_cost_u.map(N_horiz)(mpc_u, R))
66mpc_terminal_cost = terminal_cost_x(mpc_x[:, -1], Qf)
67mpc_cost = mpc_y_cost + mpc_u_cost + mpc_terminal_cost
68
69# Box constraints on the actuator force:
70C = -F_max * np.ones(N_horiz), +F_max * np.ones(N_horiz)
71
72# Compile into an alpaqa problem
73import alpaqa
74
75# Generate C code for the cost function, compile it, and load it as an
76# alpaqa problem description:
77problem = (
78 alpaqa.minimize(mpc_cost, cs.vec(mpc_u)) # objective and variables
79 .subject_to_box(C) # box constraints on the variables
80 .with_param(mpc_param) # parameters to be changed later
81).compile(sym=cs.MX.sym)
82
83from datetime import timedelta
84
85# Configure an alpaqa solver:
86Solver = alpaqa.PANOCSolver
87inner_solver = Solver(
88 panoc_params={
89 "max_time": timedelta(seconds=Ts),
90 "max_iter": 200,
91 "stop_crit": alpaqa.PANOCStopCrit.ProjGradUnitNorm2,
92 },
93 direction=alpaqa.StructuredLBFGSDirection(
94 lbfgs_params={"memory": 15},
95 direction_params={"hessian_vec_factor": 0},
96 ),
97)
98alm_params = {
99 "tolerance": 1e-4,
100 "initial_tolerance": 1e-4,
101}
102solver = alpaqa.ALMSolver(
103 alm_params=alm_params,
104 inner_solver=inner_solver,
105)
106
107
108# %% Controller class
109
110
111# Wrap the solver in a class that solves the optimal control problem at each
112# time step, implementing warm starting:
113class MPCController:
114 def __init__(self, problem: alpaqa.CasADiProblem):
115 self.problem = problem
116 self.tot_it = 0
117 self.tot_time = timedelta()
118 self.max_time = timedelta()
119 self.failures = 0
120 self.u = np.ones(problem.n)
121
122 def __call__(self, state_n):
123 state_n = np.array(state_n).ravel()
124 # Set the current state as the initial state
125 self.problem.param[:nx] = state_n
126 # Shift over the previous solution by one time step
127 self.u = np.concatenate((self.u[nu:], self.u[-nu:]))
128 # Solve the optimal control problem
129 # (warm start using the shifted previous solution)
130 self.u, _, stats = solver(self.problem, self.u)
131 # Print some solver statistics
132 print(
133 f"{stats['status']!s:24} iter={stats['inner']['iterations']:4} "
134 f"time={stats['elapsed_time']} "
135 f"failures={stats['inner_convergence_failures']} "
136 f"tol={stats['ε']:1.3e} stepsize={stats['inner']['final_γ']:1.3e}"
137 )
138 self.tot_it += stats["inner"]["iterations"]
139 self.failures += stats["status"] != alpaqa.SolverStatus.Converged
140 self.tot_time += stats["elapsed_time"]
141 self.max_time = max(self.max_time, stats["elapsed_time"])
142 # Return the optimal control signal for the first time step
143 return self.u[:nu]
144
145
146# %% Simulate the system using the MPC controller
147
148state_0 = np.array([-np.pi, 0, 0.2, 0]) # Initial state of the system
149
150# Parameters
151Q = [10, 1e-2, 1, 1e-2]
152Qf = np.array([10, 1e-1, 1000, 1e-1])
153R = [1e-1]
154problem.param = np.concatenate((state_0, Q, Qf, R))
155
156# Simulation
157mpc_states = np.empty((nx, N_sim + 1))
158mpc_inputs = np.empty((nu, N_sim))
159mpc_states[:, 0] = state_0
160controller = MPCController(problem)
161for n in range(N_sim):
162 # Solve the optimal control problem:
163 mpc_inputs[:, n] = controller(mpc_states[:, n])
164 # Apply the first optimal control input to the system and simulate for
165 # one time step, then update the state:
166 mpc_states[:, n + 1] = f_d(mpc_states[:, n], mpc_inputs[:, n]).T
167
168print(f"{controller.tot_it} iterations, {controller.failures} failures")
169print(
170 f"time: {controller.tot_time} (total), {controller.max_time} (max), "
171 f"{controller.tot_time / N_sim} (avg)"
172)
173
174
175# %% Visualize the results
176
177import matplotlib.pyplot as plt
178import matplotlib as mpl
179from matplotlib import animation
180
181mpl.rcParams["animation.frame_format"] = "svg"
182
183# Plot the cart and pendulum
184fig, ax = plt.subplots()
185h = 0.04
186x = [state_0[2], state_0[2] + l_pend * np.sin(state_0[0])]
187y = [h, h + l_pend * np.cos(state_0[0])]
188(target_pend,) = ax.plot(x, y, "--", color="tab:blue", label="Initial state")
189state_N = [0, 0, 0, 0]
190x = [state_N[2], state_N[2] + l_pend * np.sin(state_N[0])]
191y = [h, h + l_pend * np.cos(state_N[0])]
192(target_pend,) = ax.plot(x, y, "--", color="tab:green", label="Target state")
193(pend,) = ax.plot(x, y, "-o", color="tab:orange", alpha=0.9, label="MPC")
194cart = plt.Rectangle((-2 * h, 0), 4 * h, h, color="tab:orange")
195ax.add_patch(cart)
196plt.legend()
197plt.ylim([-l_pend + h, l_pend + 2 * h])
198plt.xlim([-1.5 * l_pend, +1.5 * l_pend])
199plt.gca().set_aspect("equal", "box")
200plt.tight_layout()
201
202
203class Animation:
204 def __call__(self, n):
205 state_n = mpc_states[:, n]
206 x = [state_n[2], state_n[2] + l_pend * np.sin(state_n[0])]
207 y = [h, h + l_pend * np.cos(state_n[0])]
208 pend.set_xdata(x)
209 pend.set_ydata(y)
210 cart.set_x(state_n[2] - 2 * h)
211 return [pend, cart]
212
213
214ani = animation.FuncAnimation(
215 fig, Animation(), interval=1000 * Ts, blit=True, repeat=True, frames=N_sim
216)
217
218fig, axs = plt.subplots(5, sharex=True, figsize=(6, 9))
219ts = np.arange(N_sim + 1) * Ts
220labels = [
221 "Angle $\\theta$ [rad]",
222 "Angular velocity $\\omega$ [rad/s]",
223 "Position $x$ [m]",
224 "Velocity $v$ [m/s]",
225]
226for i, (ax, lbl) in enumerate(zip(axs[:-1], labels)):
227 ax.plot(ts, mpc_states[i, :])
228 ax.set_title(lbl)
229ax = axs[-1]
230ax.plot(ts[:N_sim], mpc_inputs.T)
231ax.set_title("Control input $F$ [N]")
232ax.set_xlabel("Simulation time $t$ [s]")
233ax.set_xlim(0, N_sim * Ts)
234plt.tight_layout()
235
236# Show the animation
237plt.show()