Files
MathematicalAnalysis/14多变量函数的微分学.tex
2023-04-22 11:59:26 +08:00

548 lines
32 KiB
TeX
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
\chapter{多变量函数的微分学}
\section{方向导数和偏导数}
\begin{definition}[方向导数]
设开集$D \subset \ndreal$$f: D \to \realnum$$\bvec{u} \in \realnum^n$$\norm{\bvec{u}} = 1$,此时称$\bvec{u}$为一个方向,$\bvec{x}_0 \in D$。如果极限
\[\tolim{t}{0} \frac{f(\bvec{x}_0 + t\bvec{u}) - f(\bvec{x})}{t}\]
存在且有限,那么称这个极限是函数$f$在点$\bvec{x}_0$处沿方向$\bvec{u}$方向的导数,记为$\dfrac{\partial f}{\partial \bvec{u}} (\bvec{x}_0)$
\end{definition}
\begin{remark}
$\phi(t) = f(\bvec{a} + t \tilde{\bvec{u}})$,则显然$\deriv{\phi}(0) = \dfrac{\partial f}{\partial \bvec{u}} (\bvec{a})$
\end{remark}
\begin{definition}[偏导数]
讨论下列单位坐标向量
\begin{align*}
\bvec{e}_1 & = (1, 0, 0, \dots, 0)\\
\bvec{e}_2 & = (0, 1, 0, \dots, 0)\\
& \quad \dots\\
\bvec{e}_n & = (0, 0, \dots, 0, 1)
\end{align*}
称函数$f$在点$\bvec{x}_0$处沿方向$\bvec{e}_i$的方向导数为$f$$\bvec{x}_0$处的第$i$个一阶偏导数,记作
\[\frac{\partial f}{\partial x_i}(\bvec{x}_0)\]
\[D_i f(\bvec{x}_0)\]
并称$D_i = \dfrac{\partial}{\partial x_i}$为第$i$个偏微分算子,$i = 1, 2, \dots, n$
\end{definition}
\section{多变量函数的微分}
我们希望与一维函数时类似,用一个切平面来线性近似一个曲面在某一点附近的值,即如果我们已知某空间曲面$S$的函数表示为$z = f(x, y)$,那么给定$S$上一点$P = (x_0, y_0, z_0)$,考察曲面上该点上的切平面的方程。首先其方程过$P$,因此应为
\[z = z_0 + a(x - x_0) + b(y - y_0)\]
其次作为切平面应该有$z_0 = f(x_0, y_0)$,同时
\[f(x, y) - z_0 - a(x - x_0) - b(y - y_0) = o\left(\sqrt{(x - x_0)^2 + (y - y_0)^2}\right)\]
\[f(x, y) - f(x_0, y_0) = a(x - x_0) + b(y - y_0) + o \left(\sqrt{(x - x_0)^2 + (y - y_0)^2}\right)\]
再进一步,我们希望线性地近似一个多元函数。假设我们一直函数$u = f(x, y, z)$。那么给定一点$P = (x_0, y_0, z_0)$,考察函数在该点附近的线性近似
\[u = u_0 + a(x - x_0) + b(y - y_0) + c(z - z_0)\]
如果它是已知函数在$P$的线性近似,那么$u_0 = f(x_0, y_0, z_0)$
\[f(x, y, z) - f(x_0, y_0, z_0) = a\Delta x + b \Delta y + c \Delta z + o\left(\sqrt{\Delta x^2 + \Delta y^2 + \Delta z^2}\right)\]
其中
\[\Delta x = x - x_0, \Delta y = y - y_0, \Delta z = z - z_0\]
\begin{definition}[函数的微分]
$D \subset \ndreal$$f: D \to \realnum$。取定一点$\bvec{x}_0 \in D\interior$。如果存在$n$维向量$\bvec{A} = (\lambda_1, \lambda_2, \dots, \lambda_n)$,满足
\[f(\bvec{x}_0 + \Delta \bvec{x}) - f(\bvec{x}_0) = \brak{\bvec{A}, \Delta \bvec{x}} + o(\norm{\Delta \bvec{x}})\]
那么称函数$f$在点$\bvec{x}_0$处可微,并称$\brak{\bvec{A}, \Delta \bvec{x}}$$f$$\bvec{x}_0$处的微分,记作
\[\dif f(\bvec{x}_0) = \brak{\bvec{A}, \Delta \bvec{x}}\]
其中$\bvec{A}$称为微分系数。
\end{definition}
$f$$\bvec{a}$点可微,$\dif f(\bvec{a}) = \brak{A, \Delta \bvec{x}}$$A = (\lambda_1, \lambda_2, \dots, \lambda_3)$。因此
\[\tolim{\norm{\Delta \bvec{x}}}{0} \frac{\abs{f(\bvec{a} + \Delta \bvec{x}) - f(\bvec{a}) - \brak{A, \Delta \bvec{x}}}}{\norm{\Delta \bvec{x}}} = 0\]
\begin{align*}
\bvec{u}_1 & = (1, 0, 0, \dots, 0)\\
\bvec{u}_2 & = (0, 1, 0, \dots, 0)\\
& \quad \dots\\
\bvec{u}_n & = (0, 0, \dots, 0, 1)
\end{align*}
为确定微分系数,取$\Delta \bvec{x} = t \bvec{u}_i$,那么
\[\norm{\Delta \bvec{x}} = \abs{t}, \brak{A, \Delta \bvec{x}} = \brak{A, t\bvec{u}_i} = t\lambda_i\]
带入上式
\[\tolim{t}{0} \abs{\frac{f(\bvec{a} + t \bvec{u}_i) - f(\bvec{a}) - t\lambda_i}{t}} = 0\]
因此
\[\lambda_i = \tolim{t}{0} \frac{f(\bvec{a} + t\bvec{u}_i) - f(\bvec{a})}{t} = \frac{\partial f}{\partial x_i} (\bvec{a}), i = 1, 2, \dots, n\eqper\]
\begin{corollary}
$f:D \to \realnum$$\bvec{a} \in D\interior$。如果$f$$\bvec{a}$点可微,则在$\bvec{a}$点的偏导数存在,且
\[\dif f(\bvec{a}) = \frac{\partial f}{\partial x_1}(\bvec{a}) \Delta x_1 + \frac{\partial f}{\partial x_2}(\bvec{a}) \Delta x_2 + \dots + \frac{\partial f}{\partial x_n}(\bvec{a}) \Delta x_n\eqper\]
\end{corollary}
\begin{corollary}
$f:D \to \realnum$$\bvec{a} \in D\interior$。如果$f$$\bvec{a}$点可微,则$f$$\bvec{a}$点连续。
\end{corollary}
\begin{proof}
\begin{align*}
\abs{f(\bvec{a} + \Delta \bvec{x}) - f(\bvec{a})} & = \abs{\brak{A, \Delta \bvec{x}} + o(\norm{\Delta \bvec{x}})}\\
& \leq \norm{A} \cdot \norm{\Delta \bvec{x}} + \abs{o(\norm{\Delta \bvec{x}})} \to 0\qedhere
\end{align*}
\end{proof}
\begin{definition}
\[Jf(\bvec{x}) = (D_1 f(\bvec{x}), D_2f(\bvec{x}), \dots, D_n f(\bvec{x}))\]
并称它为函数$f$在点$\bvec{x}$处的Jacobian。函数的Jacobian也常记为$\gra f$$\nabla f$,即
\[\gra f(\bvec{x}) = J f(\bvec{x})\]
称之为数量函数$f$的梯度。
\end{definition}
\begin{proposition}
如果$f$$\bvec{a}$点可微,则对于任意方向$\bvec{u} \in \ndreal$$\norm{\bvec{u}} = 1$,那么
\[D_{\bvec{u}} f(\bvec{a}) = \frac{\partial f}{\partial \bvec{u}} (\bvec{a}) = \brak{\gra f(\bvec{a}), \bvec{u}}\eqper\]
\end{proposition}
\begin{corollary}
对于任意方向$\bvec{u}$$\abs{D_{\bvec{u}} f(\bvec{a})} \leq \norm{\gra f(\bvec{a})}$
$\gra f(\bvec{a}) \neq 0$$\bvec{u} = \frac{\gra f(\bvec{a})}{\norm{\gra f(\bvec{a})}}$,则$D_{\bvec{u}} f(\bvec{a}) = \norm{\gra f(\bvec{a})}$
这说明,$f$$\bvec{a}$的梯度向量的方向是$f$值增加最快的方向,大小是$f$在该点所有方向导数的最大值。
\end{corollary}
下面的命题给出了一个函数可微的必要条件。
\begin{proposition}
若函数$f$$\bvec{a}$点可微,则存在
\[\gra f(\bvec{a}) = (D_1 f(\bvec{a}), \dots, D_n f(\bvec{a}))\]
从而在该点的所有方向导数都存在。
\end{proposition}
下面的命题则给出了一个函数可微的充分条件。
\begin{proposition}
如果$f$的每个偏导数$D_i f(\bvec{x}), i = 1, 2, \dots, n$$\bvec{x} = \bvec{a}$点都存在且连续,则$f$$\bvec{a}$点可微。
\end{proposition}
\begin{proof}
$n = 2$为例。在$P = (a, b)$点附近考虑函数$f(x, y)$
\[f(a + \Delta x, b + \Delta y) - f(a, b) = f(a + \Delta x, b + \Delta y) - f(a + \Delta x, b) + f(a + \Delta x, b) - f(a, b)\]
应用一元函数中值定理,存在$\eta, \theta \in (0, 1)$满足
\begin{align*}
f(a + \Delta x, b + \Delta y) - f(a + \Delta x, b) & = D_y f(a + \Delta x, b + \eta \Delta y) \Delta y\\
f(a + \Delta x, b) - f(a, b) & = D_x f(a + \theta \Delta x, b)\Delta x
\end{align*}
可以将上式凑配为
\begin{align*}
f(a + \Delta x, b + \Delta y) - f(a + \Delta x, b) & = D_y f(a, b) \Delta y + [D_y f(a + \Delta x, b + \eta \Delta y) - D_y f(a, b)] \Delta y\\
f(a + \Delta x, b) - f(a, b) & = D_x f(a, b) \Delta x + [D_x f(a + \theta \Delta x, b) - D_x f(a, b)] \Delta x
\end{align*}
\begin{align*}
[\alpha] & = D_y f(a + \Delta x, b + \eta \Delta y) - D_y f(a, b)\\
[\beta] & = D_x f(a + \theta \Delta x, b) - D_x f(a, b)
\end{align*}
那么
\[f(a + \Delta x, b + \Delta y) - f(a, b) = D_x f(a, b) \Delta x + D_y f(a, b) \Delta y + [\alpha] \Delta x + [\beta] \Delta y\]
于是我们只需证明$[\alpha] \Delta x + [\beta] \Delta y = o\left(\sqrt{x^2 + y^2}\right)$。我们已知$D_x f(x, y), D_y f(x, y)$$P = (a, b)$点连续,因此
\[\frac{\abs{[\alpha]\Delta x + [\beta] \Delta y}}{\sqrt{\Delta x^2 + \Delta y^2}} \leq \abs{[\alpha]} + \abs{[\beta]} \to 0\]
综上,
\[f(a + \Delta x, b + \Delta y) - f(a, b) = D_x f(a, b) \Delta x + D_y f(a, b) \Delta y + o\left[\sqrt{\Delta x^2 + \Delta y^2}\right] \eqper \qedhere\]
\end{proof}
总结起来,偏导数在$\bvec{a}$点都连续可以推出函数在$\bvec{a}$点可微,进而可以推出函数在$\bvec{a}$点连续,也可以推出函数在$\bvec{a}$点所有方向导数都存在。
\section{向量值函数的微分}
\begin{definition}
如果映射\boldf 满足存在Jacobian $J \boldf (\bvec{x}_0)$且满足
\[\boldf (\bvec{x}_0 + \Delta \bvec{x}) - \boldf (\bvec{x}_0) = J \boldf (\bvec{x}_0) \Delta \bvec{x} + o\left(\norm{\Delta \bvec{x}}\right)\]
其中
\[J\boldf (\bvec{x}_0) =
\begin{bmatrix}
D_1 f_1(\bvec{x}_0) & \cdots & D_n f_1(\bvec{x}_0)\\
\vdots & \ddots & \vdots\\
D_1 f_m(\bvec{x}_0) & \cdots & D_n f_m(\bvec{x}_0)
\end{bmatrix}
=
\begin{bmatrix}
\gra f_1 (\bvec{x}_0)\\
\vdots\\
\gra f_m (\bvec{x}_0)
\end{bmatrix}\]
此时\boldf$\bvec{x}_0$点的微分记为
\[\dif \boldf (\bvec{x}_0) = J \boldf (\bvec{x}_0) \Delta \bvec{x} \eqper\]
\end{definition}
\begin{theorem}
若映射\boldf 在开集$D$上存在Jacobian $J \boldf$,且$J \boldf$的各元素在点$\bvec{x}_0$处都连续,则映射\boldf 在点$\bvec{x}_0$处可微。
\end{theorem}
\section{复合求导}
\begin{theorem}
$D \in \ndreal$$\bvec{f}: D \to \realnum^m$$\bvec{g}: \Omega \to \realnum^k$$\bvec{f}(D) \subset \Omega \subset \realnum^m$。如果\boldf$\bvec{x}_0 \in D\interior$上可微,$\bvec{g}$$\boldf(\bvec{x}_0)$上可微,那么复合映射$\bvec{g} \circ \boldf$在点$\bvec{x}_0$处可微,且
\[J(\bvec{g} \circ \bvec{f}) = J \bvec{g}(\boldf (\bvec{x}_0)) J \boldf(\bvec{x}_0)\eqper\]
\end{theorem}
如果我们记$\bvec{u} = \bvec{g}(\bvec{y}), \bvec{y} = \bvec{f}(\bvec{x})$,那么$\bvec{g} \circ \bvec{f}$的Jacobin可以写为
\[\begin{bmatrix}
\dfrac{\partial u_1}{\partial x_1} & \dfrac{\partial u_1}{\partial x_2} & \cdots & \dfrac{\partial u_1}{\partial x_n}\\[1em]
\dfrac{\partial u_2}{\partial x_1} & \dfrac{\partial u_2}{\partial x_2} & \cdots & \dfrac{\partial u_2}{\partial x_n}\\[1ex]
\vdots & \vdots & \ddots & \vdots\\
\dfrac{\partial u_k}{\partial x_1} & \dfrac{\partial u_k}{\partial x_2} & \cdots & \dfrac{\partial u_k}{\partial x_n}
\end{bmatrix}
=
\begin{bmatrix}
\dfrac{\partial u_1}{\partial y_1} & \dfrac{\partial u_1}{\partial y_2} & \cdots & \dfrac{\partial u_1}{\partial y_m}\\[1em]
\dfrac{\partial u_2}{\partial y_1} & \dfrac{\partial u_2}{\partial y_2} & \cdots & \dfrac{\partial u_2}{\partial y_m}\\[1ex]
\vdots & \vdots & \ddots & \vdots\\
\dfrac{\partial u_k}{\partial y_1} & \dfrac{\partial u_k}{\partial y_2} & \cdots & \dfrac{\partial u_k}{\partial y_m}
\end{bmatrix}
\begin{bmatrix}
\dfrac{\partial y_1}{\partial x_1} & \dfrac{\partial y_1}{\partial x_2} & \cdots & \dfrac{\partial y_1}{\partial x_n}\\[1em]
\dfrac{\partial y_2}{\partial x_1} & \dfrac{\partial y_2}{\partial x_2} & \cdots & \dfrac{\partial y_2}{\partial x_n}\\[1ex]
\vdots & \vdots & \ddots & \vdots\\
\dfrac{\partial y_m}{\partial x_1} & \dfrac{\partial y_m}{\partial x_2} & \cdots & \dfrac{\partial y_m}{\partial x_n}
\end{bmatrix}\eqper\]
\section{隐函数定理}
\begin{theorem}[隐函数定理]
设开集$D \subset \realnum^2$,函数$F: D \to \realnum$满足条件:
\begin{enumerate}[label=(\roman{*})]
\item $F \in C^1(D)$
\item$(x_0, y_0) \in D$使得$F(x_0, y_0) = 0$
\item $\dfrac{\partial F(x_0, y_0)}{\partial y} \neq 0$
\end{enumerate}
则存在$\delta, \eta > 0$以及唯一的函数$f: (x_0 - \delta, x_0 + \delta) \to (y_0 - \eta, y_0 + \eta)$具有性质
\begin{enumerate}
\item 对任意的$\abs{x - x_0} < \delta$$f(x_0) = y_0$,有$F(x, f(x)) = 0$
\item $f \in C^1(x_0 - \delta, x_0 + \delta)$
\item$x \in (x_0 - \delta, x_0 + \delta)$$y = f(x)$,有
\[\deriv{f}(x) = -\frac{\dfrac{\partial F}{\partial x}(x, y)}{\dfrac{\partial F}{\partial y}(x, y)}\eqper\]
\end{enumerate}
\end{theorem}
\begin{theorem}
设开集$D \subset \realnum^{n + 1}$$F: D \to \realnum$,满足条件:
\begin{enumerate}[label=(\roman{*})]
\item $F \in C^{(1)}(D)$
\item$(\bvec{x}_0, y_0) \in D$使得$F(\bvec{x}_0, y_0) = 0$
\item $\dfrac{\partial F(\bvec{x}_0, y_0)}{\partial y} \neq 0$
\end{enumerate}
则存在$\delta, \eta > 0$以及唯一的函数$f: B_\delta (\bvec{x}_0) \to (y_0 - \eta, y_0 + \eta)$具有性质
\begin{enumerate}
\item 对任意的$\norm{\bvec{x} - \bvec{x}_0} < \delta$$f(\bvec{x}_0) = y_0$,有$F(\bvec{x}, f(\bvec{x})) = 0$
\item $f \in C^1 (B_\delta (\bvec{x}_0))$
\item$\bvec{x} \in B_\delta (\bvec{x}_0)$$y = f(\bvec{x})$,有
\[D_i f(x) = -\frac{\dfrac{\partial F}{\partial x_i}(\bvec{x}, y)}{\dfrac{\partial F}{\partial y}(\bvec{x}, y)}, i = 1, 2, \dots, n\eqper\]
\end{enumerate}
\end{theorem}
\section{隐映射定理}
我们先引入几个记号。设想有$m$个方程形成的方程组
\[\begin{cases}
F_1(x_1, \dots, x_n, y_1, \dots, y_m) = 0,\\
\qquad \dots\dots\\
F_m(x_1, \dots, x_n, y_1, \dots, y_m) = 0
\end{cases}\label{隐映射定理1}\tag{1}\]
如果这个方程组是一个合适的约束,那么我们可以期望从中解出$y_1, \dots, y_m$,使得其中的每一个都是$x_1, \dots, x_n$的函数,即
\[\begin{cases}
y_1 = f_1(x_1, \dots, x_n)\\
\qquad \dots\dots\\
y_m = f_m(x_1, \dots, x_n)
\end{cases}\label{隐映射定理2}\tag{2}\]
为了缩短记号,可令
\[\bvec{F} = \begin{bmatrix}
F_1\\ \vdots\\ F_m
\end{bmatrix},
\boldf = \begin{bmatrix}
f_1\\ \vdots\\ f_m
\end{bmatrix}\]
那么\eqref{隐映射定理1}式可以写为
\[\bvec{F}(\bvec{x}, \bvec{y}) = \bvec{0}\]
\eqref{隐映射定理2}式可以写为
\[\bvec{y} = \boldf (\bvec{x})\eqper\]
我们设$\bvec{F}$定义在开集$D \subset \realnum^{m + n}$,那么在$m \times (n + m)$矩阵
\[J \bvec{F} = \begin{bmatrix}
\dfrac{\partial F_1}{\partial x_1} & \cdots & \dfrac{\partial F_1}{\partial x_n} & \dfrac{\partial F_1}{\partial y_1} & \cdots & \dfrac{\partial F_1}{y_m}\\[1ex]
\vdots & \ddots & \vdots & \vdots & \ddots & \vdots\\
\dfrac{\partial F_m}{\partial x_1} & \cdots & \dfrac{\partial F_m}{\partial x_n} & \dfrac{\partial F_m}{\partial y_1} & \cdots & \dfrac{\partial F_m}{y_m}
\end{bmatrix}\]
中作分块$J\bvec{F} = \begin{bmatrix}
J_x \bvec{F} & J_y \bvec{F}
\end{bmatrix}$
其中
\[J_x \bvec{F} = \begin{bmatrix}
\dfrac{\partial F_1}{\partial x_1} & \cdots & \dfrac{\partial F_1}{\partial x_n}\\
\vdots & \ddots & \vdots\\
\dfrac{\partial F_m}{\partial x_1} & \cdots & \dfrac{\partial F_m}{\partial x_n}
\end{bmatrix},
J_y \bvec{F} = \begin{bmatrix}
\dfrac{\partial F_1}{\partial y_1} & \cdots & \dfrac{\partial F_1}{\partial y_m}\\
\vdots & \ddots & \vdots\\
\dfrac{\partial F_m}{\partial y_1} & \cdots & \dfrac{\partial F_m}{\partial y_m}
\end{bmatrix}\]
其中$J_y \bvec{F}$$m$阶方阵。
\begin{theorem}[隐映射定理]
设开集$D \subset \realnum^{n + m}$,映射$\bvec{F}: D \to \realnum^m$,满足下列条件:
\begin{enumerate}[label=(\roman{*})]
\item $\bvec{F} \in C^1(D)$
\item$(\bvec{x}_0, \bvec{y}_0) \in D$使得$\bvec{F}(\bvec{x}_0, \bvec{y}_0) = \bvec{0}$
\item $\det[J_y \bvec{F}(\bvec{x}_0, \bvec{y}_0)] \neq 0$
\end{enumerate}
则存在$\delta, \eta > 0$以及唯一的函数$\boldf: B_\delta (\bvec{x}_0) \to B_\eta (\bvec{y}_0)$具有性质
\begin{enumerate}
\item 对任意的$\norm{\bvec{x} - \bvec{x}_0} < \delta$$\bvec{f}(\bvec{x}_0) = \bvec{y}_0$,有$\bvec{F}(\bvec{x}, f(\bvec{x})) = \bvec{0}$
\item $\bvec{f} \in C^1 (B_\delta (\bvec{x}_0), \realnum^m)$
\item$\bvec{x} \in B_\delta (\bvec{x}_0)$$\bvec{y} = \bvec{f}(\bvec{x})$,有
\[J\bvec{f}(\bvec{x}) = -(J_y \bvec{F}(\bvec{x}, \bvec{y}))^{-1} J_x \bvec{F}(\bvec{x}, \bvec{y})\eqper\]
\end{enumerate}
\end{theorem}
\section{逆映射定理}
给定$\boldf: D \to \ndreal$$D \subset \ndreal$。考察\boldf 的反函数及其性质:$\bvec{y} = \boldf^{-1}(\bvec{x})$
考虑应用隐函数定理。$\bvec{y} = \boldf^{-1}(\bvec{x})$意味着$\bvec{x} = \boldf(\bvec{x})$。因此定义$\bvec{F}:\tilde{D} \to \ndreal$,满足
\[\bvec{F}(\bvec{x}, \bvec{y}) = \bvec{x} - \boldf(\bvec{y}), (\bvec{x}, \bvec{y}) \in \ndreal \times D = \tilde{D} \subset \realnum^{2n}\]
再取$\bvec{y}_0 \in D\interior$$\bvec{x}_0 = \boldf(\bvec{y}_0)$,那么$(\bvec{x}_0, \bvec{y}_0) \in \tilde{D}\interior$$\bvec{F}(\bvec{x}_0, \bvec{y}_0) = \bvec{0}$
如果我们假设$\bvec{f} \in C^1$,那么
\[J_{\bvec{y}} \bvec{F}(\bvec{x}, \bvec{y}) = -J \bvec{f}(\bvec{y})\]
\[J_{\bvec{x}} \bvec{F}(\bvec{x}, \bvec{y}) = J\bvec{x} = \bvec{I}_n \eqper\]
\begin{theorem}[逆映射定理(局部)]
$\bvec{f} \in C^1(D, \ndreal), D \subset \ndreal, \bvec{y}_0 \in D\interior$满足$\det(J\bvec{f}(\bvec{y}_0)) \neq 0$,那么存在$\delta, \eta > 0$以及函数$\bvec{g}: B_\delta (\bvec{x}_0) \to B_\eta(\bvec{y}_0)$,其中$\bvec{x}_0 = \bvec{f}(\bvec{y}_0)$满足以下性质:
\begin{enumerate}
\item 对任意的$\bvec{x}$满足$\norm{\bvec{x} - \bvec{x}_0} < \delta$$\bvec{g}(\bvec{x}_0) = \bvec{y}_0$$\bvec{f}(\bvec{g}(\bvec{x})) = \bvec{x}$
\item $\bvec{g} \in C^1(B_\delta(\bvec{x}_0), \ndreal)$
\item $J\bvec{g}(\bvec{x}) = [J\bvec{f}(\bvec{y})]^{-1}$,其中$\bvec{y} = \bvec{g}(\bvec{x})$
\end{enumerate}
\end{theorem}
\begin{theorem}[逆映射定理]
$\bvec{f} \in C^1(D, \ndreal), D \subset\ndreal$为开集,且$\bvec{f}:D \to \ndreal$为单射,对任意的$\bvec{y}$$\det(J\bvec{f}(\bvec{y})) \neq 0$。则记$\Omega = \bvec{f}(D)$,存在$\bvec{f}$的反函数$\bvec{f}^{-1} \in C^1 (\Omega, \ndreal)$且对任意的$\bvec{x} \in \Omega$
\[J\bvec{f}^{-1}(\bvec{x}) = (J\bvec{f}(\bvec{y}))^{-1}, \bvec{y} = \bvec{f}^{-1}(\bvec{x})\eqper\]
\end{theorem}
\section{高阶偏导数}
\begin{definition}
设在开集$D$上的每一点,函数$f$存在偏导数
\[D_i f(\bvec{x}) = \frac{\partial f}{\partial x_i}(\bvec{x}), i = 1, 2, \dots, n\]
称他们为$f$的一阶偏导数,如果对这些偏导函数又可取偏导数,得出的就是$f$的二阶偏导函数,仿照此可以定义三阶偏导函数乃至更高阶的偏导数。我们将以一阶偏导数$\dfrac{\partial f}{\partial x_j}$再对$x_i$求偏导数时,把$\dfrac{\partial}{\partial x_i}\left(\dfrac{\partial f}{\partial x_j}\right)$记作$\dfrac{\partial^2 f}{\partial x_i \partial x_j}$,如果$i = j$,那么把$\dfrac{\partial^2 f}{\partial x_i \partial x_i}$记作$\dfrac{\partial^2 f}{\partial x_i^2}$
\end{definition}
\begin{theorem}[Clairaut定理]
$f:D \to \realnum$$D \subset \realnum^2$是开集,$P = (x_0, y_0) \in D$。若$\dfrac{\partial^2 f}{\partial x \partial y}$$\dfrac{\partial^2 f}{\partial y \partial x}$$D$内存在且在$P$点连续,那么二者在该点相等。
\end{theorem}
\begin{corollary}
$f: D \to \realnum$$D \subset \ndreal$是开集。若$f$$D$内所有$k$阶偏导数都存在且连续,则$k$阶偏导数的值与关于自变量的求导次序无关。
\end{corollary}
\section{拟微分平均值定理}
这个下面这个定理主要阐述的是多元数值函数的中值定理。
\begin{theorem}
设定义在凸区域$D \subset \ndreal$上的函数$f$可微,则对任何两点$\bvec{a}, \bvec{b} \in D$,在由$\bvec{a}$$\bvec{b}$确定的直线段上有一点$\bvec{\xi}$使得
\[f(\bvec{b}) - f(\bvec{a}) = Jf(\bvec{\xi})(\bvec{b} - \bvec{a})\eqper\]
\end{theorem}
对于向量值函数,中值定理不一定成立,而有下面的拟微分平均值定理:
\begin{theorem}[拟微分平均值定理]
设凸区域$D \subset \ndreal$$\boldf: D \to \realnum^m$,映射\boldf$D$上可微,则对于任何$\bvec{a}, \bvec{b} \in D$,在由$\bvec{a}, \bvec{b}$确定的线段上必有一点$\bvec{\xi}$使得
\[\norm{\boldf(\bvec{b}) - \boldf(\bvec{a})} \leq \norm{J\boldf(\bvec{\xi})} \norm{\bvec{b} - \bvec{a}}\eqper\]
\end{theorem}
\begin{corollary}
设区域$D \subset \ndreal$$\bvec{f}:D \to \realnum^m$,如果$J\bvec{f} = \bvec{0}$$D$上成立,则\boldf$D$上为一常向量。
\end{corollary}
\section{Taylor公式}
考虑用多项式近似一个多元函数,即对在$\bvec{a}$点有$m + 1$阶连续偏导数的$n$元函数$f(x)$,是否有$m$次多项式$P_m(\bvec{x})$,使得
\[f(\bvec{a} + \Delta \bvec{x}) = P_m (\Delta \bvec{x}) + o(\norm{\Delta \bvec{x}}^m)\]
成立?
我们设$f \in C^{m + 1}(B_r(\bvec{a})), \bvec{a} \in \ndreal, r > 0$,那么对任意满足$\norm{\Delta \bvec{x}} < r$$\Delta \bvec{x}$,定义
\[\varphi(t) = f(\bvec{a} + t\Delta \bvec{x}) \in C^{m + 1}[0, 1]\]
应用一元函数的Taylor公式对任意的$t \in [0, 1]$,都存在$\theta \in (0, 1)$满足
\[\varphi(t) = \sum_{k = 1}^m \frac{\varphi^{(k)}(0)}{k!}t^k + \frac{\varphi^{(m + 1)}(\theta t)}{(m + 1)!} t^{m + 1}\]
特别取$t = 1$得到
\[\varphi(1) = \sum_{k = 1}^m \frac{\varphi^{(k)}(0)}{k!} + \frac{\varphi^{(m + 1)}(\theta)}{(m + 1)!}\]
将引入的一元函数的表达式带入(特别注意求导项的带入):
计算
\begin{align*}
& \deriv{\varphi} (t) = \sum_{i = 1}^n \frac{\partial f(\bvec{a} + t\Delta \bvec{x})}{\partial x_i} \Delta x_i, \deriv{\varphi}(0) = \sum_{i = 1}^n \frac{\partial f(\bvec{a})}{\partial x_i}\Delta x_i\\
& \varphi^{\prime \prime} (t) = \sum_{i, j = 1}^n \frac{\partial^2 f(\bvec{a} + t\Delta \bvec{x})}{\partial x_i \partial x_j} \Delta x_i \Delta x_j, \varphi^{\prime \prime} (0) = \sum_{i, j = 1}^n \frac{\partial ^2 f(\bvec{a})}{\partial x_i \partial x_j}\Delta x_i \Delta x_j\\
& \dots \dots
\end{align*}
我们引入记号$\alpha = (\alpha_1, \dots, \alpha_n)$,其中每个$\alpha_i$都是非负整数,记
\[\abs{\alpha} = \alpha_1 + \dots + \alpha_n, \alpha! = \alpha_1! \dots \alpha_n!\]
如果$\bvec{x} = (x_1, \dots, x_n)$,那么记$x^\alpha = x_1^{\alpha_1} \dots x_n^{\alpha_n}$
对于多重指标$\alpha = (\alpha_1, \dots, \alpha_n)$,我们还引进记号
\[D^\alpha f(\bvec{a}) = \frac{\partial^{\alpha_1 + \dots + \alpha_n}f}{\partial x_1^{\alpha_1} \dots \partial x_n^{\alpha_n}} (\bvec{a})\eqper\]
于是我们可以叙述多元函数Taylor公式
\begin{theorem}[多元函数Taylor公式]\label{多元函数Taylor公式}
$D \subset \ndreal$是一个凸区域,$f \in C^{m + 1}(D)$$\bvec{a} = (a_1, \dots, a_n)$$\bvec{a} + \bvec{h} = (a_1 + h_1, \dots, a_n + h_n)$$D$中两点,则必存在$\theta \in (0, 1)$使得
\[f(\bvec{a} + \bvec{h}) = \sum_{k = 0}^m \sum_{\abs{\alpha} = k} \frac{D^\alpha f(\bvec{a})}{\alpha!} h^\alpha + R_m\]
其中
\[R_m = \sum_{\abs{\alpha} = m + 1} \frac{D^\alpha f(\bvec{a} + \theta \bvec{h})}{\alpha!} h^\alpha\]
称为Lagrange余项。
\end{theorem}
这个定理中的和式的意思是,对每个$\alpha$,都求对$f$$\alpha$次偏导的导函数在$\bvec{a}$处的值,并且对$x_i$求了$\alpha_i$次偏导就要在后面乘上$\dfrac{x_i^{\alpha_i}}{\alpha_i!}$
我们再引入一个高阶微分的记号。对$\bvec{h} = (h_1, h_2, \dots, h_3)$,我们定义
\begin{align*}
& \qquad\left(h_1 \frac{\partial}{\partial x_1} + \dots + h_n \frac{\partial}{\partial x_n}\right)^k f(\bvec{a})\\
& = \sum_{\abs{\alpha} = k} \frac{k!}{\alpha!} \frac{\partial^{\alpha_1}}{\partial x_1^{\alpha_1}}\dots \frac{\partial^{\alpha_n}}{\partial x_n^{\alpha_n}}f(\bvec{a}) \bvec{h}^\alpha\\
& = \sum_{\abs{\alpha} = k} \frac{k!}{\alpha!}D^\alpha f(\bvec{a}) h^\alpha
\end{align*}
特别考虑二元函数的情况。如果设$x, y$的改变量为$h, k$,那么
\begin{align*}
\left(h \frac{\partial}{\partial x} + k \frac{\partial}{\partial y}\right) f(x, y) & = \frac{\partial f(x, y)}{\partial x}h + \frac{\partial f(x, y)}{\partial y}k\\
\left(h \frac{\partial}{\partial x} + k \frac{\partial}{\partial y}\right)^2 f(x, y) & = \frac{\partial^2 f(x, y)}{\partial x^2}h^2 + 2 \frac{\partial^2 f(x, y)}{\partial x \partial y}hk + \frac{\partial^2 f(x, y)}{\partial y^2}k^2\\
\left(h \frac{\partial}{\partial x} + k \frac{\partial}{\partial y}\right)^m f(x, y) & = \sum_{i = 0}^m \binom{m}{i} \frac{\partial^m f(x, y)}{\partial x^ \partial y^{m - i}} h^i k^{m - i}, m = 1, \dots, n + 1
\end{align*}
在一般的应用中特别重要的是Taylor公式的前三项。把他们具体写出来
\[f(\bvec{a} + \bvec{h}) = f(\bvec{a}) + \sum_{i = 1}^n \frac{\partial f}{\partial x_i}(\bvec{a})h_i + \frac{1}{2} \sum_{i, j = 1}^n \frac{\partial^2 f}{\partial x_i \partial x_j} (\bvec{a}) h_i h_j + \cdots\]
如果记
\[Hf(\bvec{a}) = \begin{bmatrix}
\dfrac{\partial^2 f}{\partial x_1^2}(\bvec{a}) & \cdots & \dfrac{\partial^2 f}{\partial x_1 \partial x_n}(\bvec{a})\\[1em]
\vdots & \ddots & \vdots\\[1ex]
\dfrac{\partial^2 f}{\partial x_n \partial x_1}(\bvec{a}) & \cdots & \dfrac{\partial^2 f}{\partial x_n^2}(\bvec{a})
\end{bmatrix}\]
进一步简记为
\[Hf(\bvec{a}) = \begin{bmatrix}
D_{11} f(\bvec{x}) & \cdots & D_{1n} f(\bvec{x})\\
\vdots & \ddots & \vdots\\
D_{n1} f(\bvec{x}) & \cdots & D_{nn} f(\bvec{x})
\end{bmatrix}\eqper\]
$Hf$称为$f$的Hessian它是一个$n$阶对称方阵。
那么前面的Taylor公式可以写成
\[f(\bvec{a} + \bvec{h}) = f(\bvec{a}) + Jf(\bvec{a})\bvec{h} + \frac{1}{2} \bvec{h}^{\mathrm{T}} Hf(\bvec{a}) \bvec{h} + \cdots\]
\begin{theorem}
在定理\ref{多元函数Taylor公式}的条件下,
\[R_m = O(\norm{\bvec{h}}^{m + 1})\eqper\]
于是我们可以把Taylor公式写成Peano余项的形式
\[f(\bvec{a} + \bvec{h}) = f(\bvec{a}) + Jf(\bvec{a}) \bvec{h} + \frac{1}{2} \bvec{h}^{\mathrm{T}} Hf(\bvec{a}) \bvec{h} + o(\norm{\bvec{h}}^2)\]
\end{theorem}
\section{极值}
\begin{definition}
$D \subset \ndreal$,函数$f:D \to \realnum$,点$\bvec{p}_0 \in D\interior$,如果存在一个球$B_r(\bvec{p}_0) \subset D\interior$,使得$f(\bvec{p}) \geq f(\bvec{p}_0)$$f(\bvec{p}) > f(\bvec{p}_0)$)对一切$\bvec{p} \in B_r(\hat{\bvec{p}}_0)$成立,那么$\bvec{p}_0$称为$f$的一个(严格)极小值点,而$f(\bvec{p}_0)$称为函数$f$的一个(严格)极小值。
同样地可以定义(严格)极大值点和(严格)极大值。极小值和极大值统称极值。
\end{definition}
类似于Fermat引理我们可以得到极值点的必要条件
\begin{theorem}
$n$元函数$f$$\bvec{p}_0$取得极值,且$Jf(\bvec{p}_0)$存在,那么必须有$Jf(\bvec{p}_0) = \bvec{0}$
$\bvec{u}$是任意方向向量,则$D_{\bvec{u}} f(\bvec{a}) = 0$
\end{theorem}
\begin{definition}
$D$中使得$Jf(\bvec{p}) = \bvec{0}$的一切内点称为函数函数$f$的驻点。极值点一定是煮点,而驻点未必是极值点。
\end{definition}
\begin{definition}
\(A = \begin{bmatrix}
a_{ij}
\end{bmatrix}\)
是一个$n$阶对称方阵。设
\[\bvec{x} = \begin{bmatrix}
x_1\\ x_2\\ \vdots\\ x_n
\end{bmatrix}\]
\[Q(\bvec{x}) = \bvec{x}^\mathrm{T} A \bvec{x} = \sum_{i, j = 1}^n a_{ij}x_i x_j\]
$x_1, x_2, \dots, x_n$的一个二次型,方阵$A$称为二次型$Q$的系数方阵。
如果对任意$\bvec{x} \neq \bvec{0}$都有$Q(\bvec{x}) \geq 0$$\leq 0$),则称二次型$Q$是正(负)定的,其系数方阵$A$相应地称为正(负)定方阵。
如果对任意$\bvec{x} \neq \bvec{0}$都有$Q(\bvec{x}) > 0$$< 0$),则称二次型$Q$是严格正(负)定的,其系数方阵$A$相应地称为严格正(负)定方阵。
如果总存在$\bvec{p}, \bvec{q} \in \ndreal$,使得$Q(\bvec{p}) < 0 < Q(\bvec{q})$,旧称二次型$Q$是不定的,其系数方阵$A$相应地称为不定方阵。
\end{definition}
\begin{theorem}
\(A = \begin{bmatrix}
a_{ij}
\end{bmatrix}\)
是一个$n$阶方阵。方阵$A$为严格正定的一个必要充分条件是它的各级顺序主子式均大于零。
\end{theorem}
欲证明一个方阵$A$负定,只需证明$-A$是正定的即可。
\begin{theorem}
设二阶对称方阵
\[A = \begin{bmatrix}
a_{11} & a_{12}\\
a_{21} & a_{22}
\end{bmatrix}\]
$A$为严格正(负)定的一个必要充分条件是
\[a_{11} > 0(a_{11} < 0),\]
\[\begin{vmatrix}
a_{11} & a_{12}\\
a_{21} & a_{22}
\end{vmatrix} > 0\]
$A$为不定矩阵的一个充分必要条件是
\[\begin{vmatrix}
a_{11} & a_{12}\\
a_{21} & a_{22}
\end{vmatrix} < 0\eqper\]
\end{theorem}
\begin{theorem}
$\bvec{x}_0$是函数$f$的一个驻点,函数$f$$\bvec{x}_0$的某一临域内有连续的二阶偏导数。
\begin{enumerate}
\item 如果Hessian $Hf(\bvec{x}_0)$是严格正定(负)方阵,那么$\bvec{x}_0$$f$的一个严格极小(大)值点。
\item 如果Hessian $Hf(\bvec{x}_0)$是不定方阵,那么$\bvec{x}_0$不是$f$的极值点。
\end{enumerate}
\end{theorem}
\section{条件极值}
首先我们引入一个问题。设$f: D \to \realnum$$\Phi: D \to \realnum^m$$D \subset \realnum^{n + m}$是开集,$(\bvec{x}, \bvec{y}) \in \realnum^{n + m}$。求满足$\Phi(\bvec{x}, \bvec{y}) = \bvec{0}$的条件下$f(\bvec{x}, \bvec{y})$的最大/最小值,记为
\[\begin{cases}
\max f(\bvec{x}, \bvec{y})\\
\Phi(\bvec{x}, \bvec{y}) = 0
\end{cases}
\text{}
\begin{cases}
\min f(\bvec{x}, \bvec{y})\\
\Phi(\bvec{x}, \bvec{y}) = 0
\end{cases}\]
其中$f(\bvec{x}, \bvec{y})$称为目标函数,$\Phi(\bvec{x}, \bvec{y}) = \bvec{0}$称为约束条件。
设在约束$\varphi(\bvec{x}, y) = 0$$f(\bvec{x}, y)$$P = (\bvec{x}_0, y_0) \in D$点达到极值,且$\dfrac{\partial \varphi}{\partial y}(\bvec{x}_0, y_0) \neq 0$,应用隐函数定理:在$P$点附近,方程$\varphi(\bvec{x}, y) = \bvec{0}$确定了隐函数$y = y(\bvec{x})$。函数$F(\bvec{x}) = f(\bvec{x}, y(\bvec{x}))$$\bvec{x}_0$达到极值,因此$J F(\bvec{x}_0) = \bvec{0}$
因此
\[\frac{\partial F}{\partial x_i} (\bvec{x}_0) = \frac{\partial f}{\partial x_i}(\bvec{x}_0, y_0) + \frac{\partial f}{\partial y} \frac{\partial y}{\partial x_i}(\bvec{x}_0) = 0, i = 1, 2, \cdots, n\]
而根据隐函数定理,
\[\frac{\partial y}{\partial x_i}(\bvec{x}_0) = -\frac{\dfrac{\partial \varphi}{\partial x_i}(\bvec{x}_0, y_0)}{\dfrac{\partial \varphi}{\partial y}(\bvec{x}_0, y_0)}\]
将它带入上式,
\[\frac{\partial f}{\partial x_i}(\bvec{x}_0, y_0) - \frac{\dfrac{\partial f}{\partial y}{(\bvec{x}_0, y_0)} \dfrac{\partial \varphi}{\partial x_i}(\bvec{x}_0, y_0)}{\dfrac{\partial \varphi}{\partial y}(\bvec{x}_0, y_0)} = 0\]
引入参数
\[\lambda = -\frac{\dfrac{\partial f}{\partial y}(\bvec{x}_0, y_0)}{\dfrac{\partial \varphi}{\partial y}(\bvec{x}_0, y_0)}\]
那么
\[\frac{\partial f}{\partial x_i}(\bvec{x}_0, y_0) + \lambda \frac{\partial \varphi}{\partial x_i}(\bvec{x}_0, y_0) = 0, i = 1, 2, \cdots, n\]
因此我们得到
\[Jf(\bvec{x}_0, y_0) + \lambda J\varphi(\bvec{x}_0, y_0) = \bvec{0}\]
是条件极值的一个必要条件。
\begin{theorem}[条件极值的必要条件]
$f \in C^1(D)$$\Phi \in C^1 (D, \realnum^m)$$D \subset \realnum^{n + m}$是开集。记$P = (\bvec{x}_0, \bvec{y}_0) \in D$,又设$\det(J_{\bvec{y}}\Phi(\bvec{x}_0, \bvec{y}_0)) \neq 0$。如果$f(\bvec{x}, \bvec{y})$在约束$\Phi(\bvec{x}, \bvec{y}) = \bvec{0}$下在$P$点达到极值,则存在$\bvec{\Lambda} = (\lambda_1, \dots, \lambda_m)$使得
\[Jf(\bvec{x}_0, \bvec{y}_0) + \bvec{\Lambda} J \Phi(\bvec{x}_0, \bvec{y}_0) = \bvec{0}\eqper\]
这也就是说$(\bvec{x}_0, \bvec{y}_0)$满足方程组
\begin{align*}
& \frac{\partial f}{\partial x_k}(\bvec{x}_0, \bvec{y}_0) + \sum_{i = 1}^m \lambda_i \frac{\partial \Phi_i}{\partial x_k}(\bvec{x}_0, \bvec{y}_0) = 0, k = 1, \cdots, n\\
& \frac{\partial f}{\partial y_j}(\bvec{x}_0, \bvec{y}_0) + \sum_{i = 1}^m \lambda_i \frac{\partial \Phi_i}{\partial y_j}(\bvec{x}_0, \bvec{y}_0) = 0, j = 1, \cdots, m\\
\end{align*}
\end{theorem}
根据这个定理我们可以引入Lagrange乘数法。定义函数$L: D \times \realnum^m \to \realnum$
\[L(\bvec{z}, \bvec{\Lambda}) = f(\bvec{z}) + \bvec{\lambda} \Phi(\bvec{z}), (\bvec{z}, \bvec{\Lambda}) \in D \times \realnum^m\]
$L$称为条件极值问题的Lagrange函数$\bvec{\Lambda}$称为Lagrange乘数/乘子。根据条件极值的表要条件,在条件极值点$\bvec{z}_0 \in D$,存在$\bvec{\Lambda} \in \realnum^m$满足
\[J_{\bvec{z}} L (\bvec{z}_0, \bvec{\Lambda}) = J_{\bvec{z}} f(\bvec{z}_0) + \bvec{\Lambda} J_{\bvec{z}}\Phi(\bvec{z}_0) = \bvec{0}\]
此外
\[J_{\bvec{\Lambda}} L(\bvec{z}_0, \bvec{\Lambda}) = \Phi(\bvec{z}_0) = \bvec{0}\]
因此
\[JL(\bvec{z}, \bvec{\Lambda}) = (J_{\bvec{z}}, J_{\bvec{\Lambda}}) = \bvec{0} \eqper\]