MathematicalAnalysis/14多变量函数的微分学.tex

\chapter{多变量函数的微分学}
\section{方向导数和偏导数}
\begin{definition}[方向导数]
    设开集$D \subset \ndreal$，$f: D \to \realnum$，$\bvec{u} \in \realnum^n$且$\norm{\bvec{u}} = 1$，此时称$\bvec{u}$为一个方向，$\bvec{x}_0 \in D$。如果极限
    \[\tolim{t}{0} \frac{f(\bvec{x}_0 + t\bvec{u}) - f(\bvec{x})}{t}\]
    存在且有限，那么称这个极限是函数$f$在点$\bvec{x}_0$处沿方向$\bvec{u}$方向的导数，记为$\dfrac{\partial f}{\partial \bvec{u}} (\bvec{x}_0)$。
\end{definition}

\begin{remark}
    记$\phi(t) = f(\bvec{a} + t \tilde{\bvec{u}})$，则显然$\deriv{\phi}(0) = \dfrac{\partial f}{\partial \bvec{u}} (\bvec{a})$。
\end{remark}

\begin{definition}[偏导数]
    讨论下列单位坐标向量
    \begin{align*}
        \bvec{e}_1 & = (1, 0, 0, \dots, 0)\\
        \bvec{e}_2 & = (0, 1, 0, \dots, 0)\\
        & \quad \dots\\
        \bvec{e}_n & = (0, 0, \dots, 0, 1)
    \end{align*}
    称函数$f$在点$\bvec{x}_0$处沿方向$\bvec{e}_i$的方向导数为$f$在$\bvec{x}_0$处的第$i$个一阶偏导数，记作
    \[\frac{\partial f}{\partial x_i}(\bvec{x}_0)\]
    或
    \[D_i f(\bvec{x}_0)\]
    并称$D_i = \dfrac{\partial}{\partial x_i}$为第$i$个偏微分算子，$i = 1, 2, \dots, n$。
\end{definition}

\section{多变量函数的微分}
我们希望与一维函数时类似，用一个切平面来线性近似一个曲面在某一点附近的值，即如果我们已知某空间曲面$S$的函数表示为$z = f(x, y)$，那么给定$S$上一点$P = (x_0, y_0, z_0)$，考察曲面上该点上的切平面的方程。首先其方程过$P$，因此应为
\[z = z_0 + a(x - x_0) + b(y - y_0)\]
其次作为切平面应该有$z_0 = f(x_0, y_0)$，同时
\[f(x, y) - z_0 - a(x - x_0) - b(y - y_0) = o\left(\sqrt{(x - x_0)^2 + (y - y_0)^2}\right)\]
即
\[f(x, y) - f(x_0, y_0) = a(x - x_0) + b(y - y_0) + o \left(\sqrt{(x - x_0)^2 + (y - y_0)^2}\right)\]

再进一步，我们希望线性地近似一个多元函数。假设我们一直函数$u = f(x, y, z)$。那么给定一点$P = (x_0, y_0, z_0)$，考察函数在该点附近的线性近似
\[u = u_0 + a(x - x_0) + b(y - y_0) + c(z - z_0)\]
如果它是已知函数在$P$的线性近似，那么$u_0 = f(x_0, y_0, z_0)$且
\[f(x, y, z) - f(x_0, y_0, z_0) = a\Delta x + b \Delta y + c \Delta z + o\left(\sqrt{\Delta x^2 + \Delta y^2 + \Delta z^2}\right)\]
其中
\[\Delta x = x - x_0, \Delta y = y - y_0, \Delta z = z - z_0\]

\begin{definition}[函数的微分]
    设$D \subset \ndreal$，$f: D \to \realnum$。取定一点$\bvec{x}_0 \in D\interior$。如果存在$n$维向量$\bvec{A} = (\lambda_1, \lambda_2, \dots, \lambda_n)$，满足
    \[f(\bvec{x}_0 + \Delta \bvec{x}) - f(\bvec{x}_0) = \brak{\bvec{A}, \Delta \bvec{x}} + o(\norm{\Delta \bvec{x}})\]
    那么称函数$f$在点$\bvec{x}_0$处可微，并称$\brak{\bvec{A}, \Delta \bvec{x}}$为$f$在$\bvec{x}_0$处的微分，记作
    \[\dif f(\bvec{x}_0) = \brak{\bvec{A}, \Delta \bvec{x}}\]
    其中$\bvec{A}$称为微分系数。
\end{definition}

设$f$在$\bvec{a}$点可微，$\dif f(\bvec{a}) = \brak{A, \Delta \bvec{x}}$，$A = (\lambda_1, \lambda_2, \dots, \lambda_3)$。因此
\[\tolim{\norm{\Delta \bvec{x}}}{0} \frac{\abs{f(\bvec{a} + \Delta \bvec{x}) - f(\bvec{a}) - \brak{A, \Delta \bvec{x}}}}{\norm{\Delta \bvec{x}}} = 0\]
记
\begin{align*}
    \bvec{u}_1 & = (1, 0, 0, \dots, 0)\\
    \bvec{u}_2 & = (0, 1, 0, \dots, 0)\\
    & \quad \dots\\
    \bvec{u}_n & = (0, 0, \dots, 0, 1)
\end{align*}

为确定微分系数，取$\Delta \bvec{x} = t \bvec{u}_i$，那么
\[\norm{\Delta \bvec{x}} = \abs{t}, \brak{A, \Delta \bvec{x}} = \brak{A, t\bvec{u}_i} = t\lambda_i\]
带入上式
\[\tolim{t}{0} \abs{\frac{f(\bvec{a} + t \bvec{u}_i) - f(\bvec{a}) - t\lambda_i}{t}} = 0\]
因此
\[\lambda_i = \tolim{t}{0} \frac{f(\bvec{a} + t\bvec{u}_i) - f(\bvec{a})}{t} = \frac{\partial f}{\partial x_i} (\bvec{a}), i = 1, 2, \dots, n\eqper\]

\begin{corollary}
    设$f:D \to \realnum$，$\bvec{a} \in D\interior$。如果$f$在$\bvec{a}$点可微，则在$\bvec{a}$点的偏导数存在，且
    \[\dif f(\bvec{a}) = \frac{\partial f}{\partial x_1}(\bvec{a}) \Delta x_1 + \frac{\partial f}{\partial x_2}(\bvec{a}) \Delta x_2 + \dots + \frac{\partial f}{\partial x_n}(\bvec{a}) \Delta x_n\eqper\]
\end{corollary}

\begin{corollary}
    设$f:D \to \realnum$，$\bvec{a} \in D\interior$。如果$f$在$\bvec{a}$点可微，则$f$在$\bvec{a}$点连续。
\end{corollary}

\begin{proof}
    \begin{align*}
        \abs{f(\bvec{a} + \Delta \bvec{x}) - f(\bvec{a})} & = \abs{\brak{A, \Delta \bvec{x}} + o(\norm{\Delta \bvec{x}})}\\
        & \leq \norm{A} \cdot \norm{\Delta \bvec{x}} + \abs{o(\norm{\Delta \bvec{x}})} \to 0\qedhere
    \end{align*}
\end{proof}

\begin{definition}
    令
    \[Jf(\bvec{x}) = (D_1 f(\bvec{x}), D_2f(\bvec{x}), \dots, D_n f(\bvec{x}))\]
    并称它为函数$f$在点$\bvec{x}$处的Jacobian。函数的Jacobian也常记为$\gra f$或$\nabla f$，即
    \[\gra f(\bvec{x}) = J f(\bvec{x})\]
    称之为数量函数$f$的梯度。
\end{definition}

\begin{proposition}
    如果$f$在$\bvec{a}$点可微，则对于任意方向$\bvec{u} \in \ndreal$，$\norm{\bvec{u}} = 1$，那么
    \[D_{\bvec{u}} f(\bvec{a}) = \frac{\partial f}{\partial \bvec{u}} (\bvec{a}) = \brak{\gra f(\bvec{a}), \bvec{u}}\eqper\]
\end{proposition}

\begin{corollary}
    对于任意方向$\bvec{u}$，$\abs{D_{\bvec{u}} f(\bvec{a})} \leq \norm{\gra f(\bvec{a})}$。

    若$\gra f(\bvec{a}) \neq 0$，$\bvec{u} = \frac{\gra f(\bvec{a})}{\norm{\gra f(\bvec{a})}}$，则$D_{\bvec{u}} f(\bvec{a}) = \norm{\gra f(\bvec{a})}$。

    这说明，$f$在$\bvec{a}$的梯度向量的方向是$f$值增加最快的方向，大小是$f$在该点所有方向导数的最大值。
\end{corollary}

下面的命题给出了一个函数可微的必要条件。
\begin{proposition}
    若函数$f$在$\bvec{a}$点可微，则存在
    \[\gra f(\bvec{a}) = (D_1 f(\bvec{a}), \dots, D_n f(\bvec{a}))\]
    从而在该点的所有方向导数都存在。
\end{proposition}

下面的命题则给出了一个函数可微的充分条件。
\begin{proposition}
    如果$f$的每个偏导数$D_i f(\bvec{x}), i = 1, 2, \dots, n$在$\bvec{x} = \bvec{a}$点都存在且连续，则$f$在$\bvec{a}$点可微。
\end{proposition}

\begin{proof}
    以$n = 2$为例。在$P = (a, b)$点附近考虑函数$f(x, y)$：
    \[f(a + \Delta x, b + \Delta y) - f(a, b) = f(a + \Delta x, b + \Delta y) - f(a + \Delta x, b) + f(a + \Delta x, b) - f(a, b)\]
    应用一元函数中值定理，存在$\eta, \theta \in (0, 1)$满足
    \begin{align*}
        f(a + \Delta x, b + \Delta y) - f(a + \Delta x, b) & = D_y f(a + \Delta x, b + \eta \Delta y) \Delta y\\
        f(a + \Delta x, b) - f(a, b) & = D_x f(a + \theta \Delta x, b)\Delta x
    \end{align*}
        可以将上式凑配为
    \begin{align*}
        f(a + \Delta x, b + \Delta y) - f(a + \Delta x, b) & = D_y f(a, b) \Delta y + [D_y f(a + \Delta x, b + \eta \Delta y) - D_y f(a, b)] \Delta y\\
        f(a + \Delta x, b) - f(a, b) & = D_x f(a, b) \Delta x + [D_x f(a + \theta \Delta x, b) - D_x f(a, b)] \Delta x
    \end{align*}
    记
    \begin{align*}
        [\alpha] & = D_y f(a + \Delta x, b + \eta \Delta y) - D_y f(a, b)\\
        [\beta] & = D_x f(a + \theta \Delta x, b) - D_x f(a, b)
    \end{align*}
    那么
    \[f(a + \Delta x, b + \Delta y) - f(a, b) = D_x f(a, b) \Delta x + D_y f(a, b) \Delta y + [\alpha] \Delta x + [\beta] \Delta y\]
    于是我们只需证明$[\alpha] \Delta x + [\beta] \Delta y = o\left(\sqrt{x^2 + y^2}\right)$。我们已知$D_x f(x, y), D_y f(x, y)$在$P = (a, b)$点连续，因此
    \[\frac{\abs{[\alpha]\Delta x + [\beta] \Delta y}}{\sqrt{\Delta x^2 + \Delta y^2}} \leq \abs{[\alpha]} + \abs{[\beta]} \to 0\]
    综上，
    \[f(a + \Delta x, b + \Delta y) - f(a, b) = D_x f(a, b) \Delta x + D_y f(a, b) \Delta y + o\left[\sqrt{\Delta x^2 + \Delta y^2}\right] \eqper \qedhere\]
\end{proof}

总结起来，偏导数在$\bvec{a}$点都连续可以推出函数在$\bvec{a}$点可微，进而可以推出函数在$\bvec{a}$点连续，也可以推出函数在$\bvec{a}$点所有方向导数都存在。

\section{向量值函数的微分}
\begin{definition}
    如果映射\boldf 满足存在Jacobian $J \boldf (\bvec{x}_0)$且满足
    \[\boldf (\bvec{x}_0 + \Delta \bvec{x}) - \boldf (\bvec{x}_0) = J \boldf (\bvec{x}_0) \Delta \bvec{x} + o\left(\norm{\Delta \bvec{x}}\right)\]
    其中
    \[J\boldf (\bvec{x}_0) =
    \begin{bmatrix}
        D_1 f_1(\bvec{x}_0) & \cdots & D_n f_1(\bvec{x}_0)\\
        \vdots & \ddots & \vdots\\
        D_1 f_m(\bvec{x}_0) & \cdots & D_n f_m(\bvec{x}_0)
    \end{bmatrix}
    =
    \begin{bmatrix}
        \gra f_1 (\bvec{x}_0)\\
        \vdots\\
        \gra f_m (\bvec{x}_0)
    \end{bmatrix}\]
    此时\boldf 在$\bvec{x}_0$点的微分记为
    \[\dif \boldf (\bvec{x}_0) = J \boldf (\bvec{x}_0) \Delta \bvec{x} \eqper\]
\end{definition}

\begin{theorem}
    若映射\boldf 在开集$D$上存在Jacobian $J \boldf$，且$J \boldf$的各元素在点$\bvec{x}_0$处都连续，则映射\boldf 在点$\bvec{x}_0$处可微。
\end{theorem}

\section{复合求导}
\begin{theorem}
    设$D \in \ndreal$，$\bvec{f}: D \to \realnum^m$，$\bvec{g}: \Omega \to \realnum^k$，$\bvec{f}(D) \subset \Omega \subset \realnum^m$。如果\boldf 在$\bvec{x}_0 \in D\interior$上可微，$\bvec{g}$在$\boldf(\bvec{x}_0)$上可微，那么复合映射$\bvec{g} \circ \boldf$在点$\bvec{x}_0$处可微，且
    \[J(\bvec{g} \circ \bvec{f}) = J \bvec{g}(\boldf (\bvec{x}_0)) J \boldf(\bvec{x}_0)\eqper\]
\end{theorem}

如果我们记$\bvec{u} = \bvec{g}(\bvec{y}), \bvec{y} = \bvec{f}(\bvec{x})$，那么$\bvec{g} \circ \bvec{f}$的Jacobin可以写为
\[\begin{bmatrix}
    \dfrac{\partial u_1}{\partial x_1} & \dfrac{\partial u_1}{\partial x_2} & \cdots & \dfrac{\partial u_1}{\partial x_n}\\[1em]
    \dfrac{\partial u_2}{\partial x_1} & \dfrac{\partial u_2}{\partial x_2} & \cdots & \dfrac{\partial u_2}{\partial x_n}\\[1ex]
    \vdots & \vdots & \ddots & \vdots\\
    \dfrac{\partial u_k}{\partial x_1} & \dfrac{\partial u_k}{\partial x_2} & \cdots & \dfrac{\partial u_k}{\partial x_n}
\end{bmatrix}
=
\begin{bmatrix}
    \dfrac{\partial u_1}{\partial y_1} & \dfrac{\partial u_1}{\partial y_2} & \cdots & \dfrac{\partial u_1}{\partial y_m}\\[1em]
    \dfrac{\partial u_2}{\partial y_1} & \dfrac{\partial u_2}{\partial y_2} & \cdots & \dfrac{\partial u_2}{\partial y_m}\\[1ex]
    \vdots & \vdots & \ddots & \vdots\\
    \dfrac{\partial u_k}{\partial y_1} & \dfrac{\partial u_k}{\partial y_2} & \cdots & \dfrac{\partial u_k}{\partial y_m}
\end{bmatrix}
\begin{bmatrix}
    \dfrac{\partial y_1}{\partial x_1} & \dfrac{\partial y_1}{\partial x_2} & \cdots & \dfrac{\partial y_1}{\partial x_n}\\[1em]
    \dfrac{\partial y_2}{\partial x_1} & \dfrac{\partial y_2}{\partial x_2} & \cdots & \dfrac{\partial y_2}{\partial x_n}\\[1ex]
    \vdots & \vdots & \ddots & \vdots\\
    \dfrac{\partial y_m}{\partial x_1} & \dfrac{\partial y_m}{\partial x_2} & \cdots & \dfrac{\partial y_m}{\partial x_n}
\end{bmatrix}\eqper\]

\section{隐函数定理}
\begin{theorem}[隐函数定理]
    设开集$D \subset \realnum^2$，函数$F: D \to \realnum$满足条件：
    \begin{enumerate}[label=(\roman{*})]
        \item $F \in C^1(D)$；
        \item 点$(x_0, y_0) \in D$使得$F(x_0, y_0) = 0$；
        \item $\dfrac{\partial F(x_0, y_0)}{\partial y} \neq 0$，
    \end{enumerate}
    则存在$\delta, \eta > 0$以及唯一的函数$f: (x_0 - \delta, x_0 + \delta) \to (y_0 - \eta, y_0 + \eta)$具有性质
    \begin{enumerate}
        \item 对任意的$\abs{x - x_0} < \delta$，$f(x_0) = y_0$，有$F(x, f(x)) = 0$；
        \item $f \in C^1(x_0 - \delta, x_0 + \delta)$；
        \item 对$x \in (x_0 - \delta, x_0 + \delta)$，$y = f(x)$，有
        \[\deriv{f}(x) = -\frac{\dfrac{\partial F}{\partial x}(x, y)}{\dfrac{\partial F}{\partial y}(x, y)}\eqper\]
    \end{enumerate}
\end{theorem}

\begin{theorem}
    设开集$D \subset \realnum^{n + 1}$，$F: D \to \realnum$，满足条件：
    \begin{enumerate}[label=(\roman{*})]
        \item $F \in C^{(1)}(D)$；
        \item 点$(\bvec{x}_0, y_0) \in D$使得$F(\bvec{x}_0, y_0) = 0$；
        \item $\dfrac{\partial F(\bvec{x}_0, y_0)}{\partial y} \neq 0$，
    \end{enumerate}
    则存在$\delta, \eta > 0$以及唯一的函数$f: B_\delta (\bvec{x}_0) \to (y_0 - \eta, y_0 + \eta)$具有性质
    \begin{enumerate}
        \item 对任意的$\norm{\bvec{x} - \bvec{x}_0} < \delta$，$f(\bvec{x}_0) = y_0$，有$F(\bvec{x}, f(\bvec{x})) = 0$；
        \item $f \in C^1 (B_\delta (\bvec{x}_0))$；
        \item 对$\bvec{x} \in B_\delta (\bvec{x}_0)$，$y = f(\bvec{x})$，有
        \[D_i f(x) = -\frac{\dfrac{\partial F}{\partial x_i}(\bvec{x}, y)}{\dfrac{\partial F}{\partial y}(\bvec{x}, y)}, i = 1, 2, \dots, n\eqper\]
    \end{enumerate}
\end{theorem}

\section{隐映射定理}
我们先引入几个记号。设想有$m$个方程形成的方程组
\[\begin{cases}
    F_1(x_1, \dots, x_n, y_1, \dots, y_m) = 0,\\
    \qquad \dots\dots\\
    F_m(x_1, \dots, x_n, y_1, \dots, y_m) = 0
\end{cases}\label{隐映射定理1}\tag{1}\]
如果这个方程组是一个合适的约束，那么我们可以期望从中解出$y_1, \dots, y_m$，使得其中的每一个都是$x_1, \dots, x_n$的函数，即
\[\begin{cases}
    y_1 = f_1(x_1, \dots, x_n)\\
    \qquad \dots\dots\\
    y_m = f_m(x_1, \dots, x_n)
\end{cases}\label{隐映射定理2}\tag{2}\]
为了缩短记号，可令
\[\bvec{F} = \begin{bmatrix}
    F_1\\ \vdots\\ F_m
\end{bmatrix},
\boldf = \begin{bmatrix}
    f_1\\ \vdots\\ f_m
\end{bmatrix}\]
那么\eqref{隐映射定理1}式可以写为
\[\bvec{F}(\bvec{x}, \bvec{y}) = \bvec{0}\]
\eqref{隐映射定理2}式可以写为
\[\bvec{y} = \boldf (\bvec{x})\eqper\]

我们设$\bvec{F}$定义在开集$D \subset \realnum^{m + n}$，那么在$m \times (n + m)$矩阵
\[J \bvec{F} = \begin{bmatrix}
    \dfrac{\partial F_1}{\partial x_1} & \cdots & \dfrac{\partial F_1}{\partial x_n} & \dfrac{\partial F_1}{\partial y_1} & \cdots & \dfrac{\partial F_1}{y_m}\\[1ex]
    \vdots & \ddots & \vdots & \vdots & \ddots & \vdots\\
    \dfrac{\partial F_m}{\partial x_1} & \cdots & \dfrac{\partial F_m}{\partial x_n} & \dfrac{\partial F_m}{\partial y_1} & \cdots & \dfrac{\partial F_m}{y_m}
\end{bmatrix}\]
中作分块$J\bvec{F} = \begin{bmatrix}
    J_x \bvec{F} & J_y \bvec{F}
\end{bmatrix}$，
其中
\[J_x \bvec{F} = \begin{bmatrix}
    \dfrac{\partial F_1}{\partial x_1} & \cdots & \dfrac{\partial F_1}{\partial x_n}\\
    \vdots & \ddots & \vdots\\
    \dfrac{\partial F_m}{\partial x_1} & \cdots & \dfrac{\partial F_m}{\partial x_n}
\end{bmatrix},
J_y \bvec{F} = \begin{bmatrix}
    \dfrac{\partial F_1}{\partial y_1} & \cdots & \dfrac{\partial F_1}{\partial y_m}\\
    \vdots & \ddots & \vdots\\
    \dfrac{\partial F_m}{\partial y_1} & \cdots & \dfrac{\partial F_m}{\partial y_m}
\end{bmatrix}\]
其中$J_y \bvec{F}$是$m$阶方阵。

\begin{theorem}[隐映射定理]
    设开集$D \subset \realnum^{n + m}$，映射$\bvec{F}: D \to \realnum^m$，满足下列条件：
    \begin{enumerate}[label=(\roman{*})]
        \item $\bvec{F} \in C^1(D)$；
        \item 点$(\bvec{x}_0, \bvec{y}_0) \in D$使得$\bvec{F}(\bvec{x}_0, \bvec{y}_0) = \bvec{0}$；
        \item $\det[J_y \bvec{F}(\bvec{x}_0, \bvec{y}_0)] \neq 0$，
    \end{enumerate}
    则存在$\delta, \eta > 0$以及唯一的函数$\boldf: B_\delta (\bvec{x}_0) \to B_\eta (\bvec{y}_0)$具有性质
    \begin{enumerate}
        \item 对任意的$\norm{\bvec{x} - \bvec{x}_0} < \delta$，$\bvec{f}(\bvec{x}_0) = \bvec{y}_0$，有$\bvec{F}(\bvec{x}, f(\bvec{x})) = \bvec{0}$；
        \item $\bvec{f} \in C^1 (B_\delta (\bvec{x}_0), \realnum^m)$；
        \item 对$\bvec{x} \in B_\delta (\bvec{x}_0)$，$\bvec{y} = \bvec{f}(\bvec{x})$，有
        \[J\bvec{f}(\bvec{x}) = -(J_y \bvec{F}(\bvec{x}, \bvec{y}))^{-1} J_x \bvec{F}(\bvec{x}, \bvec{y})\eqper\]
    \end{enumerate}
\end{theorem}