Inverted Pendulum#
In this example, a mode predictive controller (MPC) is used to swing up and stabilize an inverted pendulum mounted on a moving cart.
The state vector consist of the angle of the pendulum \(\theta\), its angular velocity \(\omega\), the position of the cart \(x\), and its velocity \(v\). We use the following model:
\[\begin{split}\newcommand{\pend}[1]{#1_\mathrm{pend}}
\newcommand{\cart}[1]{#1_\mathrm{cart}}
\begin{aligned}
\pend{a} &= \pend{l} \sin(\theta)\, \omega^2 - g \cos(\theta) \sin(\theta) \\
a &= \frac{F - \cart{b}\, v + \pend{m}\, \pend{a}}{\cart{m} + \pend{m}} \\
\alpha &= \frac{g \sin(\theta) - a \cos(\theta)}{\pend{l}} \\
\begin{pmatrix} \dot \theta \\ \dot \omega \\ \dot x \\ \dot v \end{pmatrix}
&= \begin{pmatrix} \omega \\ \alpha \\ v \\ a \end{pmatrix}
\end{aligned}\end{split}\]
A quadratic cost function is used, applied to the sine of half the pendulum angle, to make it periodic with period \(2\pi\):
\[\ell(x, v, \theta, \omega, F) = q_\theta \sin^2(\theta/2) + q_\omega\, \omega^2 + q_x\, x^2 + q_v\, v^2 + r_F\, F^2\]
The force \(F\) exerted on the cart is limited to \(\pm 5 \mathrm{N}\).
1# %% Model
2
3import casadi as cs
4import numpy as np
5
6# State vector
7θ = cs.SX.sym("θ") # Pendulum angle [rad]
8ω = cs.SX.sym("ω") # Pendulum angular velocity [rad/s]
9x = cs.SX.sym("x") # Cart position [m]
10v = cs.SX.sym("v") # Cart velocity [m/s]
11state = cs.vertcat(θ, ω, x, v) # Full state vector
12nx = state.shape[0] # Number of states
13
14# Input
15F = cs.SX.sym("F") # External force applied to the cart [N]
16nu = F.shape[0] # Number of inputs
17
18# Parameters
19F_max = 5 # Maximum force applied to the cart [N]
20m_cart = 0.8 # Mass of the cart [kg]
21m_pend = 0.3 # Mass of the pendulum [kg]
22b_cart = 0.1 # Friction coefficient of the cart [N/m/s]
23l_pend = 0.3 # Length of the pendulum [m]
24g_gravity = 9.81 # Gravitational acceleration [m/s²]
25Ts = 0.025 # Simulation sampling time [s]
26N_horiz = 50 # MPC horizon [time steps]
27N_sim = 300 # Simulation length [time steps]
28
29# Continous-time dynamics
30a_pend = l_pend * cs.sin(θ) * ω**2 - g_gravity * cs.cos(θ) * cs.sin(θ)
31a = (F - b_cart * v + m_pend * a_pend) / (m_cart + m_pend)
32α = (g_gravity * cs.sin(θ) - a * cs.cos(θ)) / l_pend
33f_c = cs.Function("f_c", [state, F], [cs.vertcat(ω, α, v, a)])
34
35# 4th order Runge-Kutta integrator
36k1 = f_c(state, F)
37k2 = f_c(state + Ts * k1 / 2, F)
38k3 = f_c(state + Ts * k2 / 2, F)
39k4 = f_c(state + Ts * k3, F)
40
41# Discrete-time dynamics
42f_d_expr = state + (Ts / 6) * (k1 + 2 * k2 + 2 * k3 + k4)
43f_d = cs.Function("f", [state, F], [f_d_expr])
44
45
46# %% Model predictive control
47
48# MPC inputs and states
49mpc_x0 = cs.MX.sym("x0", nx) # Initial state
50mpc_u = cs.MX.sym("u", (1, N_horiz)) # Inputs
51mpc_x = f_d.mapaccum(N_horiz)(mpc_x0, mpc_u) # Simulated states
52
53# MPC cost
54Q = cs.MX.sym("Q", nx) # Stage state cost
55Qf = cs.MX.sym("Qf", nx) # Terminal state cost
56R = cs.MX.sym("R", nu) # Stage input cost
57s, u = cs.MX.sym("s", nx), cs.MX.sym("u", nu)
58sin_s = cs.vertcat(cs.sin(s[0] / 2), s[1:]) # Penalize sin(θ/2), not θ
59stage_cost_x = cs.Function("lx", [s, Q], [cs.dot(sin_s, cs.diag(Q) @ sin_s)])
60terminal_cost_x = cs.Function("lf", [s, Qf], [cs.dot(sin_s, cs.diag(Qf) @ sin_s)])
61stage_cost_u = cs.Function("lu", [u, R], [cs.dot(u, cs.diag(R) @ u)])
62
63mpc_param = cs.vertcat(mpc_x0, Q, Qf, R)
64mpc_y_cost = cs.sum2(stage_cost_x.map(N_horiz - 1)(mpc_x[:, :-1], Q))
65mpc_u_cost = cs.sum2(stage_cost_u.map(N_horiz)(mpc_u, R))
66mpc_terminal_cost = terminal_cost_x(mpc_x[:, -1], Qf)
67mpc_cost = mpc_y_cost + mpc_u_cost + mpc_terminal_cost
68
69# Box constraints on the actuator force:
70C = -F_max * np.ones(N_horiz), +F_max * np.ones(N_horiz)
71
72# Initial state of the system
73state_0 = np.array([-np.pi, 0, 0.2, 0])
74
75# Initial parameter value
76Q = [10, 1e-2, 1, 1e-2]
77Qf = np.array([10, 1e-1, 1000, 1e-1])
78R = [1e-1]
79param_0 = np.concatenate((state_0, Q, Qf, R))
80
81# Compile into an alpaqa problem
82import alpaqa
83
84# Generate C code for the cost function, compile it, and load it as an
85# alpaqa problem description:
86problem = (
87 alpaqa.minimize(mpc_cost, cs.vec(mpc_u)) # objective and variables
88 .subject_to_box(C) # box constraints on the variables
89 .with_param(mpc_param, param_0) # parameters to be changed later
90).compile(sym=cs.MX.sym)
91
92from datetime import timedelta
93
94# Configure an alpaqa solver:
95Solver = alpaqa.PANOCSolver
96inner_solver = Solver(
97 panoc_params={
98 "max_time": timedelta(seconds=Ts),
99 "max_iter": 200,
100 "stop_crit": alpaqa.PANOCStopCrit.ProjGradUnitNorm2,
101 },
102 direction=alpaqa.StructuredLBFGSDirection(
103 lbfgs_params={"memory": 15},
104 direction_params={"hessian_vec_factor": 0},
105 ),
106)
107alm_params = {
108 "tolerance": 1e-4,
109 "initial_tolerance": 1e-4,
110}
111solver = alpaqa.ALMSolver(
112 alm_params=alm_params,
113 inner_solver=inner_solver,
114)
115
116
117# %% Controller class
118
119
120# Wrap the solver in a class that solves the optimal control problem at each
121# time step, implementing warm starting:
122class MPCController:
123 def __init__(self, problem: alpaqa.CasADiProblem):
124 self.problem = problem
125 self.tot_it = 0
126 self.tot_time = timedelta()
127 self.max_time = timedelta()
128 self.failures = 0
129 self.u = np.ones(problem.n)
130
131 def __call__(self, state_n):
132 state_n = np.array(state_n).ravel()
133 # Set the current state as the initial state
134 self.problem.param[:nx] = state_n
135 # Shift over the previous solution by one time step
136 self.u = np.concatenate((self.u[nu:], self.u[-nu:]))
137 # Solve the optimal control problem
138 # (warm start using the shifted previous solution)
139 self.u, _, stats = solver(self.problem, self.u)
140 # Print some solver statistics
141 print(
142 f"{stats['status']!s:24} iter={stats['inner']['iterations']:4} "
143 f"time={stats['elapsed_time']} "
144 f"failures={stats['inner_convergence_failures']} "
145 f"tol={stats['ε']:1.3e} stepsize={stats['inner']['final_γ']:1.3e}"
146 )
147 self.tot_it += stats["inner"]["iterations"]
148 self.failures += stats["status"] != alpaqa.SolverStatus.Converged
149 self.tot_time += stats["elapsed_time"]
150 self.max_time = max(self.max_time, stats["elapsed_time"])
151 # Return the optimal control signal for the first time step
152 return self.u[:nu]
153
154
155# %% Simulate the system using the MPC controller
156
157# Simulation
158mpc_states = np.empty((nx, N_sim + 1))
159mpc_inputs = np.empty((nu, N_sim))
160mpc_states[:, 0] = state_0
161controller = MPCController(problem)
162for n in range(N_sim):
163 # Solve the optimal control problem:
164 mpc_inputs[:, n] = controller(mpc_states[:, n])
165 # Apply the first optimal control input to the system and simulate for
166 # one time step, then update the state:
167 mpc_states[:, n + 1] = f_d(mpc_states[:, n], mpc_inputs[:, n]).T
168
169print(f"{controller.tot_it} iterations, {controller.failures} failures")
170print(
171 f"time: {controller.tot_time} (total), {controller.max_time} (max), "
172 f"{controller.tot_time / N_sim} (avg)"
173)
174
175
176# %% Visualize the results
177
178import matplotlib.pyplot as plt
179import matplotlib as mpl
180from matplotlib import animation
181
182mpl.rcParams["animation.frame_format"] = "svg"
183
184# Plot the cart and pendulum
185fig, ax = plt.subplots()
186h = 0.04
187x = [state_0[2], state_0[2] + l_pend * np.sin(state_0[0])]
188y = [h, h + l_pend * np.cos(state_0[0])]
189(target_pend,) = ax.plot(x, y, "--", color="tab:blue", label="Initial state")
190state_N = [0, 0, 0, 0]
191x = [state_N[2], state_N[2] + l_pend * np.sin(state_N[0])]
192y = [h, h + l_pend * np.cos(state_N[0])]
193(target_pend,) = ax.plot(x, y, "--", color="tab:green", label="Target state")
194(pend,) = ax.plot(x, y, "-o", color="tab:orange", alpha=0.9, label="MPC")
195cart = plt.Rectangle((-2 * h, 0), 4 * h, h, color="tab:orange")
196ax.add_patch(cart)
197plt.legend()
198plt.ylim([-l_pend + h, l_pend + 2 * h])
199plt.xlim([-1.5 * l_pend, +1.5 * l_pend])
200plt.gca().set_aspect("equal", "box")
201plt.tight_layout()
202
203
204class Animation:
205 def __call__(self, n):
206 state_n = mpc_states[:, n]
207 x = [state_n[2], state_n[2] + l_pend * np.sin(state_n[0])]
208 y = [h, h + l_pend * np.cos(state_n[0])]
209 pend.set_xdata(x)
210 pend.set_ydata(y)
211 cart.set_x(state_n[2] - 2 * h)
212 return [pend, cart]
213
214
215ani = animation.FuncAnimation(
216 fig, Animation(), interval=1000 * Ts, blit=True, repeat=True, frames=N_sim
217)
218
219fig, axs = plt.subplots(5, sharex=True, figsize=(6, 9))
220ts = np.arange(N_sim + 1) * Ts
221labels = [
222 "Angle $\\theta$ [rad]",
223 "Angular velocity $\\omega$ [rad/s]",
224 "Position $x$ [m]",
225 "Velocity $v$ [m/s]",
226]
227for i, (ax, lbl) in enumerate(zip(axs[:-1], labels)):
228 ax.plot(ts, mpc_states[i, :])
229 ax.set_title(lbl)
230ax = axs[-1]
231ax.plot(ts[:N_sim], mpc_inputs.T)
232ax.set_title("Control input $F$ [N]")
233ax.set_xlabel("Simulation time $t$ [s]")
234ax.set_xlim(0, N_sim * Ts)
235plt.tight_layout()
236
237# Show the animation
238plt.show()