Inverted Pendulum#
In this example, a mode predictive controller (MPC) is used to swing up and stabilize an inverted pendulum mounted on a moving cart.
Plot of the states and the control signal of the MPC solution.#
The state vector consist of the angle of the pendulum
A quadratic cost function is used, applied to the sine of half the pendulum angle,
to make it periodic with period
The force
1# %% Model
2
3import casadi as cs
4import numpy as np
5
6# State vector
7θ = cs.SX.sym("θ") # Pendulum angle [rad]
8ω = cs.SX.sym("ω") # Pendulum angular velocity [rad/s]
9x = cs.SX.sym("x") # Cart position [m]
10v = cs.SX.sym("v") # Cart velocity [m/s]
11state = cs.vertcat(θ, ω, x, v) # Full state vector
12nx = state.shape[0] # Number of states
13
14# Input
15F = cs.SX.sym("F") # External force applied to the cart [N]
16nu = F.shape[0] # Number of inputs
17
18# Parameters
19F_max = 5 # Maximum force applied to the cart [N]
20m_cart = 0.8 # Mass of the cart [kg]
21m_pend = 0.3 # Mass of the pendulum [kg]
22b_cart = 0.1 # Friction coefficient of the cart [N/m/s]
23l_pend = 0.3 # Length of the pendulum [m]
24g_gravity = 9.81 # Gravitational acceleration [m/s²]
25Ts = 0.025 # Simulation sampling time [s]
26N_horiz = 50 # MPC horizon [time steps]
27N_sim = 300 # Simulation length [time steps]
28
29# Continous-time dynamics
30a_pend = l_pend * cs.sin(θ) * ω**2 - g_gravity * cs.cos(θ) * cs.sin(θ)
31a = (F - b_cart * v + m_pend * a_pend) / (m_cart + m_pend)
32α = (g_gravity * cs.sin(θ) - a * cs.cos(θ)) / l_pend
33f_c = cs.Function("f_c", [state, F], [cs.vertcat(ω, α, v, a)])
34
35# 4th order Runge-Kutta integrator
36k1 = f_c(state, F)
37k2 = f_c(state + Ts * k1 / 2, F)
38k3 = f_c(state + Ts * k2 / 2, F)
39k4 = f_c(state + Ts * k3, F)
40
41# Discrete-time dynamics
42f_d_expr = state + (Ts / 6) * (k1 + 2 * k2 + 2 * k3 + k4)
43f_d = cs.Function("f", [state, F], [f_d_expr])
44
45
46# %% Model predictive control
47
48# MPC inputs and states
49mpc_x0 = cs.MX.sym("x0", nx) # Initial state
50mpc_u = cs.MX.sym("u", (1, N_horiz)) # Inputs
51mpc_x = f_d.mapaccum(N_horiz)(mpc_x0, mpc_u) # Simulated states
52
53# MPC cost
54Q = cs.MX.sym("Q", nx) # Stage state cost
55Qf = cs.MX.sym("Qf", nx) # Terminal state cost
56R = cs.MX.sym("R", nu) # Stage input cost
57s, u = cs.MX.sym("s", nx), cs.MX.sym("u", nu)
58sin_s = cs.vertcat(cs.sin(s[0] / 2), s[1:]) # Penalize sin(θ/2), not θ
59stage_cost_x = cs.Function("lx", [s, Q], [cs.dot(sin_s, cs.diag(Q) @ sin_s)])
60terminal_cost_x = cs.Function("lf", [s, Qf], [cs.dot(sin_s, cs.diag(Qf) @ sin_s)])
61stage_cost_u = cs.Function("lu", [u, R], [cs.dot(u, cs.diag(R) @ u)])
62
63mpc_param = cs.vertcat(mpc_x0, Q, Qf, R)
64mpc_y_cost = cs.sum2(stage_cost_x.map(N_horiz - 1)(mpc_x[:, :-1], Q))
65mpc_u_cost = cs.sum2(stage_cost_u.map(N_horiz)(mpc_u, R))
66mpc_terminal_cost = terminal_cost_x(mpc_x[:, -1], Qf)
67mpc_cost = mpc_y_cost + mpc_u_cost + mpc_terminal_cost
68
69# Box constraints on the actuator force:
70C = -F_max * np.ones(N_horiz), +F_max * np.ones(N_horiz)
71
72# Initial state of the system
73state_0 = np.array([-np.pi, 0, 0.2, 0])
74
75# Initial parameter value
76Q = [10, 1e-2, 1, 1e-2]
77Qf = np.array([10, 1e-1, 1000, 1e-1])
78R = [1e-1]
79param_0 = np.concatenate((state_0, Q, Qf, R))
80
81# Compile into an alpaqa problem
82import alpaqa
83
84# Generate C code for the cost function, compile it, and load it as an
85# alpaqa problem description:
86problem = (
87 alpaqa.minimize(mpc_cost, cs.vec(mpc_u)) # objective and variables
88 .subject_to_box(C) # box constraints on the variables
89 .with_param(mpc_param, param_0) # parameters to be changed later
90 .with_name(f"inverted_pendulum_{N_horiz}") # name used in generated files
91).compile(sym=cs.MX.sym)
92
93from datetime import timedelta
94
95# Configure an alpaqa solver:
96Solver = alpaqa.PANOCSolver
97inner_solver = Solver(
98 panoc_params={
99 "max_time": timedelta(seconds=Ts),
100 "max_iter": 200,
101 "stop_crit": alpaqa.PANOCStopCrit.ProjGradUnitNorm2,
102 },
103 direction=alpaqa.StructuredLBFGSDirection(
104 lbfgs_params={"memory": 15},
105 direction_params={"hessian_vec_factor": 0},
106 ),
107)
108alm_params = {
109 "tolerance": 1e-4,
110 "initial_tolerance": 1e-4,
111}
112solver = alpaqa.ALMSolver(
113 alm_params=alm_params,
114 inner_solver=inner_solver,
115)
116
117
118# %% Controller class
119
120
121# Wrap the solver in a class that solves the optimal control problem at each
122# time step, implementing warm starting:
123class MPCController:
124 def __init__(self, problem: alpaqa.CasADiProblem):
125 self.problem = problem
126 self.tot_it = 0
127 self.tot_time = timedelta()
128 self.max_time = timedelta()
129 self.failures = 0
130 self.u = np.ones(problem.n)
131
132 def __call__(self, state_n):
133 state_n = np.array(state_n).ravel()
134 # Set the current state as the initial state
135 self.problem.param[:nx] = state_n
136 # Shift over the previous solution by one time step
137 self.u = np.concatenate((self.u[nu:], self.u[-nu:]))
138 # Solve the optimal control problem
139 # (warm start using the shifted previous solution)
140 self.u, _, stats = solver(self.problem, self.u)
141 # Print some solver statistics
142 print(
143 f"{stats['status']!s:24} iter={stats['inner']['iterations']:4} "
144 f"time={stats['elapsed_time']} "
145 f"failures={stats['inner_convergence_failures']} "
146 f"tol={stats['ε']:1.3e} stepsize={stats['inner']['final_γ']:1.3e}"
147 )
148 self.tot_it += stats["inner"]["iterations"]
149 self.failures += stats["status"] != alpaqa.SolverStatus.Converged
150 self.tot_time += stats["elapsed_time"]
151 self.max_time = max(self.max_time, stats["elapsed_time"])
152 # Return the optimal control signal for the first time step
153 return self.u[:nu]
154
155
156# %% Simulate the system using the MPC controller
157
158# Simulation
159mpc_states = np.empty((nx, N_sim + 1))
160mpc_inputs = np.empty((nu, N_sim))
161mpc_states[:, 0] = state_0
162controller = MPCController(problem)
163for n in range(N_sim):
164 # Solve the optimal control problem:
165 mpc_inputs[:, n] = controller(mpc_states[:, n])
166 # Apply the first optimal control input to the system and simulate for
167 # one time step, then update the state:
168 mpc_states[:, n + 1] = f_d(mpc_states[:, n], mpc_inputs[:, n]).T
169
170print(f"{controller.tot_it} iterations, {controller.failures} failures")
171print(
172 f"time: {controller.tot_time} (total), {controller.max_time} (max), "
173 f"{controller.tot_time / N_sim} (avg)"
174)
175
176
177# %% Visualize the results
178
179import matplotlib.pyplot as plt
180import matplotlib as mpl
181from matplotlib import animation
182
183mpl.rcParams["animation.frame_format"] = "svg"
184
185# Plot the cart and pendulum
186fig, ax = plt.subplots()
187h = 0.04
188x = [state_0[2], state_0[2] + l_pend * np.sin(state_0[0])]
189y = [h, h + l_pend * np.cos(state_0[0])]
190(target_pend,) = ax.plot(x, y, "--", color="tab:blue", label="Initial state")
191state_N = [0, 0, 0, 0]
192x = [state_N[2], state_N[2] + l_pend * np.sin(state_N[0])]
193y = [h, h + l_pend * np.cos(state_N[0])]
194(target_pend,) = ax.plot(x, y, "--", color="tab:green", label="Target state")
195(pend,) = ax.plot(x, y, "-o", color="tab:orange", alpha=0.9, label="MPC")
196cart = plt.Rectangle((-2 * h, 0), 4 * h, h, color="tab:orange")
197ax.add_patch(cart)
198plt.legend()
199plt.ylim([-l_pend + h, l_pend + 2 * h])
200plt.xlim([-1.5 * l_pend, +1.5 * l_pend])
201plt.gca().set_aspect("equal", "box")
202plt.tight_layout()
203
204
205class Animation:
206 def __call__(self, n):
207 state_n = mpc_states[:, n]
208 x = [state_n[2], state_n[2] + l_pend * np.sin(state_n[0])]
209 y = [h, h + l_pend * np.cos(state_n[0])]
210 pend.set_xdata(x)
211 pend.set_ydata(y)
212 cart.set_x(state_n[2] - 2 * h)
213 return [pend, cart]
214
215
216ani = animation.FuncAnimation(
217 fig, Animation(), interval=1000 * Ts, blit=True, repeat=True, frames=N_sim
218)
219
220fig, axs = plt.subplots(5, sharex=True, figsize=(6, 9))
221ts = np.arange(N_sim + 1) * Ts
222labels = [
223 "Angle $\\theta$ [rad]",
224 "Angular velocity $\\omega$ [rad/s]",
225 "Position $x$ [m]",
226 "Velocity $v$ [m/s]",
227]
228for i, (ax, lbl) in enumerate(zip(axs[:-1], labels)):
229 ax.plot(ts, mpc_states[i, :])
230 ax.set_title(lbl)
231ax = axs[-1]
232ax.plot(ts[:N_sim], mpc_inputs.T)
233ax.set_title("Control input $F$ [N]")
234ax.set_xlabel("Simulation time $t$ [s]")
235ax.set_xlim(0, N_sim * Ts)
236plt.tight_layout()
237
238# Show the animation
239plt.show()