Inverted Pendulum#
In this example, a mode predictive controller (MPC) is used to swing up and stabilize an inverted pendulum mounted on a moving cart.
The state vector consist of the angle of the pendulum \(\theta\), its angular velocity \(\omega\), the position of the cart \(x\), and its velocity \(v\). We use the following model:
\[\begin{split}\newcommand{\pend}[1]{#1_\mathrm{pend}}
\newcommand{\cart}[1]{#1_\mathrm{cart}}
\begin{aligned}
\pend{a} &= \pend{l} \sin(\theta)\, \omega^2 - g \cos(\theta) \sin(\theta) \\
a &= \frac{F - \cart{b}\, v + \pend{m}\, \pend{a}}{\cart{m} + \pend{m}} \\
\alpha &= \frac{g \sin(\theta) - a \cos(\theta)}{\pend{l}} \\
\begin{pmatrix} \dot \theta \\ \dot \omega \\ \dot x \\ \dot v \end{pmatrix}
&= \begin{pmatrix} \omega \\ \alpha \\ v \\ a \end{pmatrix}
\end{aligned}\end{split}\]
A quadratic cost function is used, applied to the sine of half the pendulum angle, to make it periodic with period \(2\pi\):
\[\ell(x, v, \theta, \omega, F) = q_\theta \sin^2(\theta/2) + q_\omega\, \omega^2 + q_x\, x^2 + q_v\, v^2 + r_F\, F^2\]
The force \(F\) exerted on the cart is limited to \(\pm 5 \mathrm{N}\).
1# %% Model
2
3import casadi as cs
4import numpy as np
5
6# State vector
7θ = cs.SX.sym("θ") # Pendulum angle [rad]
8ω = cs.SX.sym("ω") # Pendulum angular velocity [rad/s]
9x = cs.SX.sym("x") # Cart position [m]
10v = cs.SX.sym("v") # Cart velocity [m/s]
11state = cs.vertcat(θ, ω, x, v) # Full state vector
12nx = state.shape[0] # Number of states
13
14# Input
15F = cs.SX.sym("F") # External force applied to the cart [N]
16nu = F.shape[0] # Number of inputs
17
18# Parameters
19F_max = 5 # Maximum force applied to the cart [N]
20m_cart = 0.8 # Mass of the cart [kg]
21m_pend = 0.3 # Mass of the pendulum [kg]
22b_cart = 0.1 # Friction coefficient of the cart [N/m/s]
23l_pend = 0.3 # Length of the pendulum [m]
24g_gravity = 9.81 # Gravitational acceleration [m/s²]
25Ts = 0.025 # Simulation sampling time [s]
26N_horiz = 50 # MPC horizon [time steps]
27N_sim = 300 # Simulation length [time steps]
28
29# Continous-time dynamics
30a_pend = l_pend * cs.sin(θ) * ω**2 - g_gravity * cs.cos(θ) * cs.sin(θ)
31a = (F - b_cart * v + m_pend * a_pend) / (m_cart + m_pend)
32α = (g_gravity * cs.sin(θ) - a * cs.cos(θ)) / l_pend
33f_c = cs.Function("f_c", [state, F], [cs.vertcat(ω, α, v, a)])
34
35# 4th order Runge-Kutta integrator
36k1 = f_c(state, F)
37k2 = f_c(state + Ts * k1 / 2, F)
38k3 = f_c(state + Ts * k2 / 2, F)
39k4 = f_c(state + Ts * k3, F)
40
41# Discrete-time dynamics
42f_d_expr = state + (Ts / 6) * (k1 + 2 * k2 + 2 * k3 + k4)
43f_d = cs.Function("f", [state, F], [f_d_expr])
44
45
46# %% Model predictive control
47
48# MPC inputs and states
49mpc_x0 = cs.MX.sym("x0", nx) # Initial state
50mpc_u = cs.MX.sym("u", (1, N_horiz)) # Inputs
51mpc_x = f_d.mapaccum(N_horiz)(mpc_x0, mpc_u) # Simulated states
52
53# MPC cost
54Q = cs.MX.sym("Q", nx) # Stage state cost
55Qf = cs.MX.sym("Qf", nx) # Terminal state cost
56R = cs.MX.sym("R", nu) # Stage input cost
57s, u = cs.MX.sym("s", nx), cs.MX.sym("u", nu)
58sin_s = cs.vertcat(cs.sin(s[0] / 2), s[1:]) # Penalize sin(θ/2), not θ
59stage_cost_x = cs.Function("lx", [s, Q], [cs.dot(sin_s, cs.diag(Q) @ sin_s)])
60terminal_cost_x = cs.Function("lf", [s, Qf], [cs.dot(sin_s, cs.diag(Qf) @ sin_s)])
61stage_cost_u = cs.Function("lu", [u, R], [cs.dot(u, cs.diag(R) @ u)])
62
63mpc_param = cs.vertcat(mpc_x0, Q, Qf, R)
64mpc_y_cost = cs.sum2(stage_cost_x.map(N_horiz - 1)(mpc_x[:, :-1], Q))
65mpc_u_cost = cs.sum2(stage_cost_u.map(N_horiz)(mpc_u, R))
66mpc_terminal_cost = terminal_cost_x(mpc_x[:, -1], Qf)
67mpc_cost = mpc_y_cost + mpc_u_cost + mpc_terminal_cost
68
69# Box constraints on the actuator force:
70C = -F_max * np.ones(N_horiz), +F_max * np.ones(N_horiz)
71
72# Initial state of the system
73state_0 = np.array([-np.pi, 0, 0.2, 0])
74
75# Initial parameter value
76Q = [10, 1e-2, 1, 1e-2]
77Qf = np.array([10, 1e-1, 1000, 1e-1])
78R = [1e-1]
79param_0 = np.concatenate((state_0, Q, Qf, R))
80
81# Compile into an alpaqa problem
82import alpaqa
83
84# Generate C code for the cost function, compile it, and load it as an
85# alpaqa problem description:
86problem = (
87 alpaqa.minimize(mpc_cost, cs.vec(mpc_u)) # objective and variables
88 .subject_to_box(C) # box constraints on the variables
89 .with_param(mpc_param, param_0) # parameters to be changed later
90 .with_name(f"inverted_pendulum_{N_horiz}") # name used in generated files
91).compile(sym=cs.MX.sym)
92
93from datetime import timedelta
94
95# Configure an alpaqa solver:
96Solver = alpaqa.PANOCSolver
97inner_solver = Solver(
98 panoc_params={
99 "max_time": timedelta(seconds=Ts),
100 "max_iter": 200,
101 "stop_crit": alpaqa.PANOCStopCrit.ProjGradUnitNorm2,
102 },
103 direction=alpaqa.StructuredLBFGSDirection(
104 lbfgs_params={"memory": 15},
105 direction_params={"hessian_vec_factor": 0},
106 ),
107)
108alm_params = {
109 "tolerance": 1e-4,
110 "initial_tolerance": 1e-4,
111}
112solver = alpaqa.ALMSolver(
113 alm_params=alm_params,
114 inner_solver=inner_solver,
115)
116
117
118# %% Controller class
119
120
121# Wrap the solver in a class that solves the optimal control problem at each
122# time step, implementing warm starting:
123class MPCController:
124 def __init__(self, problem: alpaqa.CasADiProblem):
125 self.problem = problem
126 self.tot_it = 0
127 self.tot_time = timedelta()
128 self.max_time = timedelta()
129 self.failures = 0
130 self.u = np.ones(problem.n)
131
132 def __call__(self, state_n):
133 state_n = np.array(state_n).ravel()
134 # Set the current state as the initial state
135 self.problem.param[:nx] = state_n
136 # Shift over the previous solution by one time step
137 self.u = np.concatenate((self.u[nu:], self.u[-nu:]))
138 # Solve the optimal control problem
139 # (warm start using the shifted previous solution)
140 self.u, _, stats = solver(self.problem, self.u)
141 # Print some solver statistics
142 print(
143 f"{stats['status']!s:24} iter={stats['inner']['iterations']:4} "
144 f"time={stats['elapsed_time']} "
145 f"failures={stats['inner_convergence_failures']} "
146 f"tol={stats['ε']:1.3e} stepsize={stats['inner']['final_γ']:1.3e}"
147 )
148 self.tot_it += stats["inner"]["iterations"]
149 self.failures += stats["status"] != alpaqa.SolverStatus.Converged
150 self.tot_time += stats["elapsed_time"]
151 self.max_time = max(self.max_time, stats["elapsed_time"])
152 # Return the optimal control signal for the first time step
153 return self.u[:nu]
154
155
156# %% Simulate the system using the MPC controller
157
158# Simulation
159mpc_states = np.empty((nx, N_sim + 1))
160mpc_inputs = np.empty((nu, N_sim))
161mpc_states[:, 0] = state_0
162controller = MPCController(problem)
163for n in range(N_sim):
164 # Solve the optimal control problem:
165 mpc_inputs[:, n] = controller(mpc_states[:, n])
166 # Apply the first optimal control input to the system and simulate for
167 # one time step, then update the state:
168 mpc_states[:, n + 1] = f_d(mpc_states[:, n], mpc_inputs[:, n]).T
169
170print(f"{controller.tot_it} iterations, {controller.failures} failures")
171print(
172 f"time: {controller.tot_time} (total), {controller.max_time} (max), "
173 f"{controller.tot_time / N_sim} (avg)"
174)
175
176
177# %% Visualize the results
178
179import matplotlib.pyplot as plt
180import matplotlib as mpl
181from matplotlib import animation
182
183mpl.rcParams["animation.frame_format"] = "svg"
184
185# Plot the cart and pendulum
186fig, ax = plt.subplots()
187h = 0.04
188x = [state_0[2], state_0[2] + l_pend * np.sin(state_0[0])]
189y = [h, h + l_pend * np.cos(state_0[0])]
190(target_pend,) = ax.plot(x, y, "--", color="tab:blue", label="Initial state")
191state_N = [0, 0, 0, 0]
192x = [state_N[2], state_N[2] + l_pend * np.sin(state_N[0])]
193y = [h, h + l_pend * np.cos(state_N[0])]
194(target_pend,) = ax.plot(x, y, "--", color="tab:green", label="Target state")
195(pend,) = ax.plot(x, y, "-o", color="tab:orange", alpha=0.9, label="MPC")
196cart = plt.Rectangle((-2 * h, 0), 4 * h, h, color="tab:orange")
197ax.add_patch(cart)
198plt.legend()
199plt.ylim([-l_pend + h, l_pend + 2 * h])
200plt.xlim([-1.5 * l_pend, +1.5 * l_pend])
201plt.gca().set_aspect("equal", "box")
202plt.tight_layout()
203
204
205class Animation:
206 def __call__(self, n):
207 state_n = mpc_states[:, n]
208 x = [state_n[2], state_n[2] + l_pend * np.sin(state_n[0])]
209 y = [h, h + l_pend * np.cos(state_n[0])]
210 pend.set_xdata(x)
211 pend.set_ydata(y)
212 cart.set_x(state_n[2] - 2 * h)
213 return [pend, cart]
214
215
216ani = animation.FuncAnimation(
217 fig, Animation(), interval=1000 * Ts, blit=True, repeat=True, frames=N_sim
218)
219
220fig, axs = plt.subplots(5, sharex=True, figsize=(6, 9))
221ts = np.arange(N_sim + 1) * Ts
222labels = [
223 "Angle $\\theta$ [rad]",
224 "Angular velocity $\\omega$ [rad/s]",
225 "Position $x$ [m]",
226 "Velocity $v$ [m/s]",
227]
228for i, (ax, lbl) in enumerate(zip(axs[:-1], labels)):
229 ax.plot(ts, mpc_states[i, :])
230 ax.set_title(lbl)
231ax = axs[-1]
232ax.plot(ts[:N_sim], mpc_inputs.T)
233ax.set_title("Control input $F$ [N]")
234ax.set_xlabel("Simulation time $t$ [s]")
235ax.set_xlim(0, N_sim * Ts)
236plt.tight_layout()
237
238# Show the animation
239plt.show()