Inverted Pendulum#

In this example, a mode predictive controller (MPC) is used to swing up and stabilize an inverted pendulum mounted on a moving cart.

Figure of the MPC solution to the inverted pendulum problem.

Plot of the states and the control signal of the MPC solution.#

The state vector consist of the angle of the pendulum \(\theta\), its angular velocity \(\omega\), the position of the cart \(x\), and its velocity \(v\). We use the following model:

\[\begin{split}\newcommand{\pend}[1]{#1_\mathrm{pend}} \newcommand{\cart}[1]{#1_\mathrm{cart}} \begin{aligned} \pend{a} &= \pend{l} \sin(\theta)\, \omega^2 - g \cos(\theta) \sin(\theta) \\ a &= \frac{F - \cart{b}\, v + \pend{m}\, \pend{a}}{\cart{m} + \pend{m}} \\ \alpha &= \frac{g \sin(\theta) - a \cos(\theta)}{\pend{l}} \\ \begin{pmatrix} \dot \theta \\ \dot \omega \\ \dot x \\ \dot v \end{pmatrix} &= \begin{pmatrix} \omega \\ \alpha \\ v \\ a \end{pmatrix} \end{aligned}\end{split}\]

A quadratic cost function is used, applied to the sine of half the pendulum angle, to make it periodic with period \(2\pi\):

\[\ell(x, v, \theta, \omega, F) = q_\theta \sin^2(\theta/2) + q_\omega\, \omega^2 + q_x\, x^2 + q_v\, v^2 + r_F\, F^2\]

The force \(F\) exerted on the cart is limited to \(\pm 5 \mathrm{N}\).

  1# %% Model
  2
  3import casadi as cs
  4import numpy as np
  5
  6# State vector
  7θ = cs.SX.sym("θ")  # Pendulum angle                     [rad]
  8ω = cs.SX.sym("ω")  # Pendulum angular velocity          [rad/s]
  9x = cs.SX.sym("x")  # Cart position                      [m]
 10v = cs.SX.sym("v")  # Cart velocity                      [m/s]
 11state = cs.vertcat(θ, ω, x, v)  # Full state vector
 12nx = state.shape[0]  # Number of states
 13
 14# Input
 15F = cs.SX.sym("F")  # External force applied to the cart [N]
 16nu = F.shape[0]  # Number of inputs
 17
 18# Parameters
 19F_max = 5  #        Maximum force applied to the cart    [N]
 20m_cart = 0.8  #     Mass of the cart                     [kg]
 21m_pend = 0.3  #     Mass of the pendulum                 [kg]
 22b_cart = 0.1  #     Friction coefficient of the cart     [N/m/s]
 23l_pend = 0.3  #     Length of the pendulum               [m]
 24g_gravity = 9.81  # Gravitational acceleration           [m/s²]
 25Ts = 0.025  #       Simulation sampling time             [s]
 26N_horiz = 50  #     MPC horizon                          [time steps]
 27N_sim = 300  #      Simulation length                    [time steps]
 28
 29# Continous-time dynamics
 30a_pend = l_pend * cs.sin(θ) * ω**2 - g_gravity * cs.cos(θ) * cs.sin(θ)
 31a = (F - b_cart * v + m_pend * a_pend) / (m_cart + m_pend)
 32α = (g_gravity * cs.sin(θ) - a * cs.cos(θ)) / l_pend
 33f_c = cs.Function("f_c", [state, F], [cs.vertcat(ω, α, v, a)])
 34
 35# 4th order Runge-Kutta integrator
 36k1 = f_c(state, F)
 37k2 = f_c(state + Ts * k1 / 2, F)
 38k3 = f_c(state + Ts * k2 / 2, F)
 39k4 = f_c(state + Ts * k3, F)
 40
 41# Discrete-time dynamics
 42f_d_expr = state + (Ts / 6) * (k1 + 2 * k2 + 2 * k3 + k4)
 43f_d = cs.Function("f", [state, F], [f_d_expr])
 44
 45
 46# %% Model predictive control
 47
 48# MPC inputs and states
 49mpc_x0 = cs.MX.sym("x0", nx)  # Initial state
 50mpc_u = cs.MX.sym("u", (1, N_horiz))  # Inputs
 51mpc_x = f_d.mapaccum(N_horiz)(mpc_x0, mpc_u)  # Simulated states
 52
 53# MPC cost
 54Q = cs.MX.sym("Q", nx)  # Stage state cost
 55Qf = cs.MX.sym("Qf", nx)  # Terminal state cost
 56R = cs.MX.sym("R", nu)  # Stage input cost
 57s, u = cs.MX.sym("s", nx), cs.MX.sym("u", nu)
 58sin_s = cs.vertcat(cs.sin(s[0] / 2), s[1:])  # Penalize sin(θ/2), not θ
 59stage_cost_x = cs.Function("lx", [s, Q], [cs.dot(sin_s, cs.diag(Q) @ sin_s)])
 60terminal_cost_x = cs.Function("lf", [s, Qf], [cs.dot(sin_s, cs.diag(Qf) @ sin_s)])
 61stage_cost_u = cs.Function("lu", [u, R], [cs.dot(u, cs.diag(R) @ u)])
 62
 63mpc_param = cs.vertcat(mpc_x0, Q, Qf, R)
 64mpc_y_cost = cs.sum2(stage_cost_x.map(N_horiz - 1)(mpc_x[:, :-1], Q))
 65mpc_u_cost = cs.sum2(stage_cost_u.map(N_horiz)(mpc_u, R))
 66mpc_terminal_cost = terminal_cost_x(mpc_x[:, -1], Qf)
 67mpc_cost = mpc_y_cost + mpc_u_cost + mpc_terminal_cost
 68
 69# Box constraints on the actuator force:
 70C = -F_max * np.ones(N_horiz), +F_max * np.ones(N_horiz)
 71
 72# Compile into an alpaqa problem
 73import alpaqa
 74
 75# Generate C code for the cost function, compile it, and load it as an
 76# alpaqa problem description:
 77problem = (
 78    alpaqa.minimize(mpc_cost, cs.vec(mpc_u))  # objective and variables
 79    .subject_to_box(C)  #                       box constraints on the variables
 80    .with_param(mpc_param)  #                   parameters to be changed later
 81).compile(sym=cs.MX.sym)
 82
 83from datetime import timedelta
 84
 85# Configure an alpaqa solver:
 86Solver = alpaqa.PANOCSolver
 87inner_solver = Solver(
 88    panoc_params={
 89        "max_time": timedelta(seconds=Ts),
 90        "max_iter": 200,
 91        "stop_crit": alpaqa.PANOCStopCrit.ProjGradUnitNorm2,
 92    },
 93    direction=alpaqa.StructuredLBFGSDirection(
 94        lbfgs_params={"memory": 15},
 95        direction_params={"hessian_vec_factor": 0},
 96    ),
 97)
 98alm_params = {
 99    "tolerance": 1e-4,
100    "initial_tolerance": 1e-4,
101}
102solver = alpaqa.ALMSolver(
103    alm_params=alm_params,
104    inner_solver=inner_solver,
105)
106
107
108# %% Controller class
109
110
111# Wrap the solver in a class that solves the optimal control problem at each
112# time step, implementing warm starting:
113class MPCController:
114    def __init__(self, problem: alpaqa.CasADiProblem):
115        self.problem = problem
116        self.tot_it = 0
117        self.tot_time = timedelta()
118        self.max_time = timedelta()
119        self.failures = 0
120        self.u = np.ones(problem.n)
121
122    def __call__(self, state_n):
123        state_n = np.array(state_n).ravel()
124        # Set the current state as the initial state
125        self.problem.param[:nx] = state_n
126        # Shift over the previous solution by one time step
127        self.u = np.concatenate((self.u[nu:], self.u[-nu:]))
128        # Solve the optimal control problem
129        # (warm start using the shifted previous solution)
130        self.u, _, stats = solver(self.problem, self.u)
131        # Print some solver statistics
132        print(
133            f"{stats['status']!s:24} iter={stats['inner']['iterations']:4} "
134            f"time={stats['elapsed_time']} "
135            f"failures={stats['inner_convergence_failures']} "
136            f"tol={stats['ε']:1.3e} stepsize={stats['inner']['final_γ']:1.3e}"
137        )
138        self.tot_it += stats["inner"]["iterations"]
139        self.failures += stats["status"] != alpaqa.SolverStatus.Converged
140        self.tot_time += stats["elapsed_time"]
141        self.max_time = max(self.max_time, stats["elapsed_time"])
142        # Return the optimal control signal for the first time step
143        return self.u[:nu]
144
145
146# %% Simulate the system using the MPC controller
147
148state_0 = np.array([-np.pi, 0, 0.2, 0])  # Initial state of the system
149
150# Parameters
151Q = [10, 1e-2, 1, 1e-2]
152Qf = np.array([10, 1e-1, 1000, 1e-1])
153R = [1e-1]
154problem.param = np.concatenate((state_0, Q, Qf, R))
155
156# Simulation
157mpc_states = np.empty((nx, N_sim + 1))
158mpc_inputs = np.empty((nu, N_sim))
159mpc_states[:, 0] = state_0
160controller = MPCController(problem)
161for n in range(N_sim):
162    # Solve the optimal control problem:
163    mpc_inputs[:, n] = controller(mpc_states[:, n])
164    # Apply the first optimal control input to the system and simulate for
165    # one time step, then update the state:
166    mpc_states[:, n + 1] = f_d(mpc_states[:, n], mpc_inputs[:, n]).T
167
168print(f"{controller.tot_it} iterations, {controller.failures} failures")
169print(
170    f"time: {controller.tot_time} (total), {controller.max_time} (max), "
171    f"{controller.tot_time / N_sim} (avg)"
172)
173
174
175# %% Visualize the results
176
177import matplotlib.pyplot as plt
178import matplotlib as mpl
179from matplotlib import animation
180
181mpl.rcParams["animation.frame_format"] = "svg"
182
183# Plot the cart and pendulum
184fig, ax = plt.subplots()
185h = 0.04
186x = [state_0[2], state_0[2] + l_pend * np.sin(state_0[0])]
187y = [h, h + l_pend * np.cos(state_0[0])]
188(target_pend,) = ax.plot(x, y, "--", color="tab:blue", label="Initial state")
189state_N = [0, 0, 0, 0]
190x = [state_N[2], state_N[2] + l_pend * np.sin(state_N[0])]
191y = [h, h + l_pend * np.cos(state_N[0])]
192(target_pend,) = ax.plot(x, y, "--", color="tab:green", label="Target state")
193(pend,) = ax.plot(x, y, "-o", color="tab:orange", alpha=0.9, label="MPC")
194cart = plt.Rectangle((-2 * h, 0), 4 * h, h, color="tab:orange")
195ax.add_patch(cart)
196plt.legend()
197plt.ylim([-l_pend + h, l_pend + 2 * h])
198plt.xlim([-1.5 * l_pend, +1.5 * l_pend])
199plt.gca().set_aspect("equal", "box")
200plt.tight_layout()
201
202
203class Animation:
204    def __call__(self, n):
205        state_n = mpc_states[:, n]
206        x = [state_n[2], state_n[2] + l_pend * np.sin(state_n[0])]
207        y = [h, h + l_pend * np.cos(state_n[0])]
208        pend.set_xdata(x)
209        pend.set_ydata(y)
210        cart.set_x(state_n[2] - 2 * h)
211        return [pend, cart]
212
213
214ani = animation.FuncAnimation(
215    fig, Animation(), interval=1000 * Ts, blit=True, repeat=True, frames=N_sim
216)
217
218fig, axs = plt.subplots(5, sharex=True, figsize=(6, 9))
219ts = np.arange(N_sim + 1) * Ts
220labels = [
221    "Angle $\\theta$ [rad]",
222    "Angular velocity $\\omega$ [rad/s]",
223    "Position $x$ [m]",
224    "Velocity $v$ [m/s]",
225]
226for i, (ax, lbl) in enumerate(zip(axs[:-1], labels)):
227    ax.plot(ts, mpc_states[i, :])
228    ax.set_title(lbl)
229ax = axs[-1]
230ax.plot(ts[:N_sim], mpc_inputs.T)
231ax.set_title("Control input $F$ [N]")
232ax.set_xlabel("Simulation time $t$ [s]")
233ax.set_xlim(0, N_sim * Ts)
234plt.tight_layout()
235
236# Show the animation
237plt.show()