548 lines
32 KiB
TeX
548 lines
32 KiB
TeX
\chapter{多变量函数的微分学}
|
||
\section{方向导数和偏导数}
|
||
\begin{definition}[方向导数]
|
||
设开集$D \subset \ndreal$,$f: D \to \realnum$,$\bvec{u} \in \realnum^n$且$\norm{\bvec{u}} = 1$,此时称$\bvec{u}$为一个方向,$\bvec{x}_0 \in D$。如果极限
|
||
\[\tolim{t}{0} \frac{f(\bvec{x}_0 + t\bvec{u}) - f(\bvec{x})}{t}\]
|
||
存在且有限,那么称这个极限是函数$f$在点$\bvec{x}_0$处沿方向$\bvec{u}$方向的导数,记为$\dfrac{\partial f}{\partial \bvec{u}} (\bvec{x}_0)$。
|
||
\end{definition}
|
||
|
||
\begin{remark}
|
||
记$\phi(t) = f(\bvec{a} + t \tilde{\bvec{u}})$,则显然$\deriv{\phi}(0) = \dfrac{\partial f}{\partial \bvec{u}} (\bvec{a})$。
|
||
\end{remark}
|
||
|
||
\begin{definition}[偏导数]
|
||
讨论下列单位坐标向量
|
||
\begin{align*}
|
||
\bvec{e}_1 & = (1, 0, 0, \dots, 0)\\
|
||
\bvec{e}_2 & = (0, 1, 0, \dots, 0)\\
|
||
& \quad \dots\\
|
||
\bvec{e}_n & = (0, 0, \dots, 0, 1)
|
||
\end{align*}
|
||
称函数$f$在点$\bvec{x}_0$处沿方向$\bvec{e}_i$的方向导数为$f$在$\bvec{x}_0$处的第$i$个一阶偏导数,记作
|
||
\[\frac{\partial f}{\partial x_i}(\bvec{x}_0)\]
|
||
或
|
||
\[D_i f(\bvec{x}_0)\]
|
||
并称$D_i = \dfrac{\partial}{\partial x_i}$为第$i$个偏微分算子,$i = 1, 2, \dots, n$。
|
||
\end{definition}
|
||
|
||
\section{多变量函数的微分}
|
||
我们希望与一维函数时类似,用一个切平面来线性近似一个曲面在某一点附近的值,即如果我们已知某空间曲面$S$的函数表示为$z = f(x, y)$,那么给定$S$上一点$P = (x_0, y_0, z_0)$,考察曲面上该点上的切平面的方程。首先其方程过$P$,因此应为
|
||
\[z = z_0 + a(x - x_0) + b(y - y_0)\]
|
||
其次作为切平面应该有$z_0 = f(x_0, y_0)$,同时
|
||
\[f(x, y) - z_0 - a(x - x_0) - b(y - y_0) = o\left(\sqrt{(x - x_0)^2 + (y - y_0)^2}\right)\]
|
||
即
|
||
\[f(x, y) - f(x_0, y_0) = a(x - x_0) + b(y - y_0) + o \left(\sqrt{(x - x_0)^2 + (y - y_0)^2}\right)\]
|
||
|
||
再进一步,我们希望线性地近似一个多元函数。假设我们一直函数$u = f(x, y, z)$。那么给定一点$P = (x_0, y_0, z_0)$,考察函数在该点附近的线性近似
|
||
\[u = u_0 + a(x - x_0) + b(y - y_0) + c(z - z_0)\]
|
||
如果它是已知函数在$P$的线性近似,那么$u_0 = f(x_0, y_0, z_0)$且
|
||
\[f(x, y, z) - f(x_0, y_0, z_0) = a\Delta x + b \Delta y + c \Delta z + o\left(\sqrt{\Delta x^2 + \Delta y^2 + \Delta z^2}\right)\]
|
||
其中
|
||
\[\Delta x = x - x_0, \Delta y = y - y_0, \Delta z = z - z_0\]
|
||
|
||
\begin{definition}[函数的微分]
|
||
设$D \subset \ndreal$,$f: D \to \realnum$。取定一点$\bvec{x}_0 \in D\interior$。如果存在$n$维向量$\bvec{A} = (\lambda_1, \lambda_2, \dots, \lambda_n)$,满足
|
||
\[f(\bvec{x}_0 + \Delta \bvec{x}) - f(\bvec{x}_0) = \brak{\bvec{A}, \Delta \bvec{x}} + o(\norm{\Delta \bvec{x}})\]
|
||
那么称函数$f$在点$\bvec{x}_0$处可微,并称$\brak{\bvec{A}, \Delta \bvec{x}}$为$f$在$\bvec{x}_0$处的微分,记作
|
||
\[\dif f(\bvec{x}_0) = \brak{\bvec{A}, \Delta \bvec{x}}\]
|
||
其中$\bvec{A}$称为微分系数。
|
||
\end{definition}
|
||
|
||
设$f$在$\bvec{a}$点可微,$\dif f(\bvec{a}) = \brak{A, \Delta \bvec{x}}$,$A = (\lambda_1, \lambda_2, \dots, \lambda_3)$。因此
|
||
\[\tolim{\norm{\Delta \bvec{x}}}{0} \frac{\abs{f(\bvec{a} + \Delta \bvec{x}) - f(\bvec{a}) - \brak{A, \Delta \bvec{x}}}}{\norm{\Delta \bvec{x}}} = 0\]
|
||
记
|
||
\begin{align*}
|
||
\bvec{u}_1 & = (1, 0, 0, \dots, 0)\\
|
||
\bvec{u}_2 & = (0, 1, 0, \dots, 0)\\
|
||
& \quad \dots\\
|
||
\bvec{u}_n & = (0, 0, \dots, 0, 1)
|
||
\end{align*}
|
||
|
||
为确定微分系数,取$\Delta \bvec{x} = t \bvec{u}_i$,那么
|
||
\[\norm{\Delta \bvec{x}} = \abs{t}, \brak{A, \Delta \bvec{x}} = \brak{A, t\bvec{u}_i} = t\lambda_i\]
|
||
带入上式
|
||
\[\tolim{t}{0} \abs{\frac{f(\bvec{a} + t \bvec{u}_i) - f(\bvec{a}) - t\lambda_i}{t}} = 0\]
|
||
因此
|
||
\[\lambda_i = \tolim{t}{0} \frac{f(\bvec{a} + t\bvec{u}_i) - f(\bvec{a})}{t} = \frac{\partial f}{\partial x_i} (\bvec{a}), i = 1, 2, \dots, n\eqper\]
|
||
|
||
\begin{corollary}
|
||
设$f:D \to \realnum$,$\bvec{a} \in D\interior$。如果$f$在$\bvec{a}$点可微,则在$\bvec{a}$点的偏导数存在,且
|
||
\[\dif f(\bvec{a}) = \frac{\partial f}{\partial x_1}(\bvec{a}) \Delta x_1 + \frac{\partial f}{\partial x_2}(\bvec{a}) \Delta x_2 + \dots + \frac{\partial f}{\partial x_n}(\bvec{a}) \Delta x_n\eqper\]
|
||
\end{corollary}
|
||
|
||
\begin{corollary}
|
||
设$f:D \to \realnum$,$\bvec{a} \in D\interior$。如果$f$在$\bvec{a}$点可微,则$f$在$\bvec{a}$点连续。
|
||
\end{corollary}
|
||
|
||
\begin{proof}
|
||
\begin{align*}
|
||
\abs{f(\bvec{a} + \Delta \bvec{x}) - f(\bvec{a})} & = \abs{\brak{A, \Delta \bvec{x}} + o(\norm{\Delta \bvec{x}})}\\
|
||
& \leq \norm{A} \cdot \norm{\Delta \bvec{x}} + \abs{o(\norm{\Delta \bvec{x}})} \to 0\qedhere
|
||
\end{align*}
|
||
\end{proof}
|
||
|
||
\begin{definition}
|
||
令
|
||
\[Jf(\bvec{x}) = (D_1 f(\bvec{x}), D_2f(\bvec{x}), \dots, D_n f(\bvec{x}))\]
|
||
并称它为函数$f$在点$\bvec{x}$处的Jacobian。函数的Jacobian也常记为$\gra f$或$\nabla f$,即
|
||
\[\gra f(\bvec{x}) = J f(\bvec{x})\]
|
||
称之为数量函数$f$的梯度。
|
||
\end{definition}
|
||
|
||
\begin{proposition}
|
||
如果$f$在$\bvec{a}$点可微,则对于任意方向$\bvec{u} \in \ndreal$,$\norm{\bvec{u}} = 1$,那么
|
||
\[D_{\bvec{u}} f(\bvec{a}) = \frac{\partial f}{\partial \bvec{u}} (\bvec{a}) = \brak{\gra f(\bvec{a}), \bvec{u}}\eqper\]
|
||
\end{proposition}
|
||
|
||
\begin{corollary}
|
||
对于任意方向$\bvec{u}$,$\abs{D_{\bvec{u}} f(\bvec{a})} \leq \norm{\gra f(\bvec{a})}$。
|
||
|
||
若$\gra f(\bvec{a}) \neq 0$,$\bvec{u} = \frac{\gra f(\bvec{a})}{\norm{\gra f(\bvec{a})}}$,则$D_{\bvec{u}} f(\bvec{a}) = \norm{\gra f(\bvec{a})}$。
|
||
|
||
这说明,$f$在$\bvec{a}$的梯度向量的方向是$f$值增加最快的方向,大小是$f$在该点所有方向导数的最大值。
|
||
\end{corollary}
|
||
|
||
下面的命题给出了一个函数可微的必要条件。
|
||
\begin{proposition}
|
||
若函数$f$在$\bvec{a}$点可微,则存在
|
||
\[\gra f(\bvec{a}) = (D_1 f(\bvec{a}), \dots, D_n f(\bvec{a}))\]
|
||
从而在该点的所有方向导数都存在。
|
||
\end{proposition}
|
||
|
||
下面的命题则给出了一个函数可微的充分条件。
|
||
\begin{proposition}
|
||
如果$f$的每个偏导数$D_i f(\bvec{x}), i = 1, 2, \dots, n$在$\bvec{x} = \bvec{a}$点都存在且连续,则$f$在$\bvec{a}$点可微。
|
||
\end{proposition}
|
||
|
||
\begin{proof}
|
||
以$n = 2$为例。在$P = (a, b)$点附近考虑函数$f(x, y)$:
|
||
\[f(a + \Delta x, b + \Delta y) - f(a, b) = f(a + \Delta x, b + \Delta y) - f(a + \Delta x, b) + f(a + \Delta x, b) - f(a, b)\]
|
||
应用一元函数中值定理,存在$\eta, \theta \in (0, 1)$满足
|
||
\begin{align*}
|
||
f(a + \Delta x, b + \Delta y) - f(a + \Delta x, b) & = D_y f(a + \Delta x, b + \eta \Delta y) \Delta y\\
|
||
f(a + \Delta x, b) - f(a, b) & = D_x f(a + \theta \Delta x, b)\Delta x
|
||
\end{align*}
|
||
可以将上式凑配为
|
||
\begin{align*}
|
||
f(a + \Delta x, b + \Delta y) - f(a + \Delta x, b) & = D_y f(a, b) \Delta y + [D_y f(a + \Delta x, b + \eta \Delta y) - D_y f(a, b)] \Delta y\\
|
||
f(a + \Delta x, b) - f(a, b) & = D_x f(a, b) \Delta x + [D_x f(a + \theta \Delta x, b) - D_x f(a, b)] \Delta x
|
||
\end{align*}
|
||
记
|
||
\begin{align*}
|
||
[\alpha] & = D_y f(a + \Delta x, b + \eta \Delta y) - D_y f(a, b)\\
|
||
[\beta] & = D_x f(a + \theta \Delta x, b) - D_x f(a, b)
|
||
\end{align*}
|
||
那么
|
||
\[f(a + \Delta x, b + \Delta y) - f(a, b) = D_x f(a, b) \Delta x + D_y f(a, b) \Delta y + [\alpha] \Delta x + [\beta] \Delta y\]
|
||
于是我们只需证明$[\alpha] \Delta x + [\beta] \Delta y = o\left(\sqrt{x^2 + y^2}\right)$。我们已知$D_x f(x, y), D_y f(x, y)$在$P = (a, b)$点连续,因此
|
||
\[\frac{\abs{[\alpha]\Delta x + [\beta] \Delta y}}{\sqrt{\Delta x^2 + \Delta y^2}} \leq \abs{[\alpha]} + \abs{[\beta]} \to 0\]
|
||
综上,
|
||
\[f(a + \Delta x, b + \Delta y) - f(a, b) = D_x f(a, b) \Delta x + D_y f(a, b) \Delta y + o\left[\sqrt{\Delta x^2 + \Delta y^2}\right] \eqper \qedhere\]
|
||
\end{proof}
|
||
|
||
总结起来,偏导数在$\bvec{a}$点都连续可以推出函数在$\bvec{a}$点可微,进而可以推出函数在$\bvec{a}$点连续,也可以推出函数在$\bvec{a}$点所有方向导数都存在。
|
||
|
||
\section{向量值函数的微分}
|
||
\begin{definition}
|
||
如果映射\boldf 满足存在Jacobian $J \boldf (\bvec{x}_0)$且满足
|
||
\[\boldf (\bvec{x}_0 + \Delta \bvec{x}) - \boldf (\bvec{x}_0) = J \boldf (\bvec{x}_0) \Delta \bvec{x} + o\left(\norm{\Delta \bvec{x}}\right)\]
|
||
其中
|
||
\[J\boldf (\bvec{x}_0) =
|
||
\begin{bmatrix}
|
||
D_1 f_1(\bvec{x}_0) & \cdots & D_n f_1(\bvec{x}_0)\\
|
||
\vdots & \ddots & \vdots\\
|
||
D_1 f_m(\bvec{x}_0) & \cdots & D_n f_m(\bvec{x}_0)
|
||
\end{bmatrix}
|
||
=
|
||
\begin{bmatrix}
|
||
\gra f_1 (\bvec{x}_0)\\
|
||
\vdots\\
|
||
\gra f_m (\bvec{x}_0)
|
||
\end{bmatrix}\]
|
||
此时\boldf 在$\bvec{x}_0$点的微分记为
|
||
\[\dif \boldf (\bvec{x}_0) = J \boldf (\bvec{x}_0) \Delta \bvec{x} \eqper\]
|
||
\end{definition}
|
||
|
||
\begin{theorem}
|
||
若映射\boldf 在开集$D$上存在Jacobian $J \boldf$,且$J \boldf$的各元素在点$\bvec{x}_0$处都连续,则映射\boldf 在点$\bvec{x}_0$处可微。
|
||
\end{theorem}
|
||
|
||
\section{复合求导}
|
||
\begin{theorem}
|
||
设$D \in \ndreal$,$\bvec{f}: D \to \realnum^m$,$\bvec{g}: \Omega \to \realnum^k$,$\bvec{f}(D) \subset \Omega \subset \realnum^m$。如果\boldf 在$\bvec{x}_0 \in D\interior$上可微,$\bvec{g}$在$\boldf(\bvec{x}_0)$上可微,那么复合映射$\bvec{g} \circ \boldf$在点$\bvec{x}_0$处可微,且
|
||
\[J(\bvec{g} \circ \bvec{f}) = J \bvec{g}(\boldf (\bvec{x}_0)) J \boldf(\bvec{x}_0)\eqper\]
|
||
\end{theorem}
|
||
|
||
如果我们记$\bvec{u} = \bvec{g}(\bvec{y}), \bvec{y} = \bvec{f}(\bvec{x})$,那么$\bvec{g} \circ \bvec{f}$的Jacobin可以写为
|
||
\[\begin{bmatrix}
|
||
\dfrac{\partial u_1}{\partial x_1} & \dfrac{\partial u_1}{\partial x_2} & \cdots & \dfrac{\partial u_1}{\partial x_n}\\[1em]
|
||
\dfrac{\partial u_2}{\partial x_1} & \dfrac{\partial u_2}{\partial x_2} & \cdots & \dfrac{\partial u_2}{\partial x_n}\\[1ex]
|
||
\vdots & \vdots & \ddots & \vdots\\
|
||
\dfrac{\partial u_k}{\partial x_1} & \dfrac{\partial u_k}{\partial x_2} & \cdots & \dfrac{\partial u_k}{\partial x_n}
|
||
\end{bmatrix}
|
||
=
|
||
\begin{bmatrix}
|
||
\dfrac{\partial u_1}{\partial y_1} & \dfrac{\partial u_1}{\partial y_2} & \cdots & \dfrac{\partial u_1}{\partial y_m}\\[1em]
|
||
\dfrac{\partial u_2}{\partial y_1} & \dfrac{\partial u_2}{\partial y_2} & \cdots & \dfrac{\partial u_2}{\partial y_m}\\[1ex]
|
||
\vdots & \vdots & \ddots & \vdots\\
|
||
\dfrac{\partial u_k}{\partial y_1} & \dfrac{\partial u_k}{\partial y_2} & \cdots & \dfrac{\partial u_k}{\partial y_m}
|
||
\end{bmatrix}
|
||
\begin{bmatrix}
|
||
\dfrac{\partial y_1}{\partial x_1} & \dfrac{\partial y_1}{\partial x_2} & \cdots & \dfrac{\partial y_1}{\partial x_n}\\[1em]
|
||
\dfrac{\partial y_2}{\partial x_1} & \dfrac{\partial y_2}{\partial x_2} & \cdots & \dfrac{\partial y_2}{\partial x_n}\\[1ex]
|
||
\vdots & \vdots & \ddots & \vdots\\
|
||
\dfrac{\partial y_m}{\partial x_1} & \dfrac{\partial y_m}{\partial x_2} & \cdots & \dfrac{\partial y_m}{\partial x_n}
|
||
\end{bmatrix}\eqper\]
|
||
|
||
\section{隐函数定理}
|
||
\begin{theorem}[隐函数定理]
|
||
设开集$D \subset \realnum^2$,函数$F: D \to \realnum$满足条件:
|
||
\begin{enumerate}[label=(\roman{*})]
|
||
\item $F \in C^1(D)$;
|
||
\item 点$(x_0, y_0) \in D$使得$F(x_0, y_0) = 0$;
|
||
\item $\dfrac{\partial F(x_0, y_0)}{\partial y} \neq 0$,
|
||
\end{enumerate}
|
||
则存在$\delta, \eta > 0$以及唯一的函数$f: (x_0 - \delta, x_0 + \delta) \to (y_0 - \eta, y_0 + \eta)$具有性质
|
||
\begin{enumerate}
|
||
\item 对任意的$\abs{x - x_0} < \delta$,$f(x_0) = y_0$,有$F(x, f(x)) = 0$;
|
||
\item $f \in C^1(x_0 - \delta, x_0 + \delta)$;
|
||
\item 对$x \in (x_0 - \delta, x_0 + \delta)$,$y = f(x)$,有
|
||
\[\deriv{f}(x) = -\frac{\dfrac{\partial F}{\partial x}(x, y)}{\dfrac{\partial F}{\partial y}(x, y)}\eqper\]
|
||
\end{enumerate}
|
||
\end{theorem}
|
||
|
||
\begin{theorem}
|
||
设开集$D \subset \realnum^{n + 1}$,$F: D \to \realnum$,满足条件:
|
||
\begin{enumerate}[label=(\roman{*})]
|
||
\item $F \in C^{(1)}(D)$;
|
||
\item 点$(\bvec{x}_0, y_0) \in D$使得$F(\bvec{x}_0, y_0) = 0$;
|
||
\item $\dfrac{\partial F(\bvec{x}_0, y_0)}{\partial y} \neq 0$,
|
||
\end{enumerate}
|
||
则存在$\delta, \eta > 0$以及唯一的函数$f: B_\delta (\bvec{x}_0) \to (y_0 - \eta, y_0 + \eta)$具有性质
|
||
\begin{enumerate}
|
||
\item 对任意的$\norm{\bvec{x} - \bvec{x}_0} < \delta$,$f(\bvec{x}_0) = y_0$,有$F(\bvec{x}, f(\bvec{x})) = 0$;
|
||
\item $f \in C^1 (B_\delta (\bvec{x}_0))$;
|
||
\item 对$\bvec{x} \in B_\delta (\bvec{x}_0)$,$y = f(\bvec{x})$,有
|
||
\[D_i f(x) = -\frac{\dfrac{\partial F}{\partial x_i}(\bvec{x}, y)}{\dfrac{\partial F}{\partial y}(\bvec{x}, y)}, i = 1, 2, \dots, n\eqper\]
|
||
\end{enumerate}
|
||
\end{theorem}
|
||
|
||
\section{隐映射定理}
|
||
我们先引入几个记号。设想有$m$个方程形成的方程组
|
||
\[\begin{cases}
|
||
F_1(x_1, \dots, x_n, y_1, \dots, y_m) = 0,\\
|
||
\qquad \dots\dots\\
|
||
F_m(x_1, \dots, x_n, y_1, \dots, y_m) = 0
|
||
\end{cases}\label{隐映射定理1}\tag{1}\]
|
||
如果这个方程组是一个合适的约束,那么我们可以期望从中解出$y_1, \dots, y_m$,使得其中的每一个都是$x_1, \dots, x_n$的函数,即
|
||
\[\begin{cases}
|
||
y_1 = f_1(x_1, \dots, x_n)\\
|
||
\qquad \dots\dots\\
|
||
y_m = f_m(x_1, \dots, x_n)
|
||
\end{cases}\label{隐映射定理2}\tag{2}\]
|
||
为了缩短记号,可令
|
||
\[\bvec{F} = \begin{bmatrix}
|
||
F_1\\ \vdots\\ F_m
|
||
\end{bmatrix},
|
||
\boldf = \begin{bmatrix}
|
||
f_1\\ \vdots\\ f_m
|
||
\end{bmatrix}\]
|
||
那么\eqref{隐映射定理1}式可以写为
|
||
\[\bvec{F}(\bvec{x}, \bvec{y}) = \bvec{0}\]
|
||
\eqref{隐映射定理2}式可以写为
|
||
\[\bvec{y} = \boldf (\bvec{x})\eqper\]
|
||
|
||
我们设$\bvec{F}$定义在开集$D \subset \realnum^{m + n}$,那么在$m \times (n + m)$矩阵
|
||
\[J \bvec{F} = \begin{bmatrix}
|
||
\dfrac{\partial F_1}{\partial x_1} & \cdots & \dfrac{\partial F_1}{\partial x_n} & \dfrac{\partial F_1}{\partial y_1} & \cdots & \dfrac{\partial F_1}{y_m}\\[1ex]
|
||
\vdots & \ddots & \vdots & \vdots & \ddots & \vdots\\
|
||
\dfrac{\partial F_m}{\partial x_1} & \cdots & \dfrac{\partial F_m}{\partial x_n} & \dfrac{\partial F_m}{\partial y_1} & \cdots & \dfrac{\partial F_m}{y_m}
|
||
\end{bmatrix}\]
|
||
中作分块$J\bvec{F} = \begin{bmatrix}
|
||
J_x \bvec{F} & J_y \bvec{F}
|
||
\end{bmatrix}$,
|
||
其中
|
||
\[J_x \bvec{F} = \begin{bmatrix}
|
||
\dfrac{\partial F_1}{\partial x_1} & \cdots & \dfrac{\partial F_1}{\partial x_n}\\
|
||
\vdots & \ddots & \vdots\\
|
||
\dfrac{\partial F_m}{\partial x_1} & \cdots & \dfrac{\partial F_m}{\partial x_n}
|
||
\end{bmatrix},
|
||
J_y \bvec{F} = \begin{bmatrix}
|
||
\dfrac{\partial F_1}{\partial y_1} & \cdots & \dfrac{\partial F_1}{\partial y_m}\\
|
||
\vdots & \ddots & \vdots\\
|
||
\dfrac{\partial F_m}{\partial y_1} & \cdots & \dfrac{\partial F_m}{\partial y_m}
|
||
\end{bmatrix}\]
|
||
其中$J_y \bvec{F}$是$m$阶方阵。
|
||
|
||
\begin{theorem}[隐映射定理]
|
||
设开集$D \subset \realnum^{n + m}$,映射$\bvec{F}: D \to \realnum^m$,满足下列条件:
|
||
\begin{enumerate}[label=(\roman{*})]
|
||
\item $\bvec{F} \in C^1(D)$;
|
||
\item 点$(\bvec{x}_0, \bvec{y}_0) \in D$使得$\bvec{F}(\bvec{x}_0, \bvec{y}_0) = \bvec{0}$;
|
||
\item $\det[J_y \bvec{F}(\bvec{x}_0, \bvec{y}_0)] \neq 0$,
|
||
\end{enumerate}
|
||
则存在$\delta, \eta > 0$以及唯一的函数$\boldf: B_\delta (\bvec{x}_0) \to B_\eta (\bvec{y}_0)$具有性质
|
||
\begin{enumerate}
|
||
\item 对任意的$\norm{\bvec{x} - \bvec{x}_0} < \delta$,$\bvec{f}(\bvec{x}_0) = \bvec{y}_0$,有$\bvec{F}(\bvec{x}, f(\bvec{x})) = \bvec{0}$;
|
||
\item $\bvec{f} \in C^1 (B_\delta (\bvec{x}_0), \realnum^m)$;
|
||
\item 对$\bvec{x} \in B_\delta (\bvec{x}_0)$,$\bvec{y} = \bvec{f}(\bvec{x})$,有
|
||
\[J\bvec{f}(\bvec{x}) = -(J_y \bvec{F}(\bvec{x}, \bvec{y}))^{-1} J_x \bvec{F}(\bvec{x}, \bvec{y})\eqper\]
|
||
\end{enumerate}
|
||
\end{theorem}
|
||
|
||
\section{逆映射定理}
|
||
给定$\boldf: D \to \ndreal$,$D \subset \ndreal$。考察\boldf 的反函数及其性质:$\bvec{y} = \boldf^{-1}(\bvec{x})$。
|
||
|
||
考虑应用隐函数定理。$\bvec{y} = \boldf^{-1}(\bvec{x})$意味着$\bvec{x} = \boldf(\bvec{x})$。因此定义$\bvec{F}:\tilde{D} \to \ndreal$,满足
|
||
\[\bvec{F}(\bvec{x}, \bvec{y}) = \bvec{x} - \boldf(\bvec{y}), (\bvec{x}, \bvec{y}) \in \ndreal \times D = \tilde{D} \subset \realnum^{2n}\]
|
||
再取$\bvec{y}_0 \in D\interior$,$\bvec{x}_0 = \boldf(\bvec{y}_0)$,那么$(\bvec{x}_0, \bvec{y}_0) \in \tilde{D}\interior$,$\bvec{F}(\bvec{x}_0, \bvec{y}_0) = \bvec{0}$。
|
||
|
||
如果我们假设$\bvec{f} \in C^1$,那么
|
||
\[J_{\bvec{y}} \bvec{F}(\bvec{x}, \bvec{y}) = -J \bvec{f}(\bvec{y})\]
|
||
且
|
||
\[J_{\bvec{x}} \bvec{F}(\bvec{x}, \bvec{y}) = J\bvec{x} = \bvec{I}_n \eqper\]
|
||
|
||
\begin{theorem}[逆映射定理(局部)]
|
||
设$\bvec{f} \in C^1(D, \ndreal), D \subset \ndreal, \bvec{y}_0 \in D\interior$满足$\det(J\bvec{f}(\bvec{y}_0)) \neq 0$,那么存在$\delta, \eta > 0$以及函数$\bvec{g}: B_\delta (\bvec{x}_0) \to B_\eta(\bvec{y}_0)$,其中$\bvec{x}_0 = \bvec{f}(\bvec{y}_0)$满足以下性质:
|
||
\begin{enumerate}
|
||
\item 对任意的$\bvec{x}$满足$\norm{\bvec{x} - \bvec{x}_0} < \delta$,$\bvec{g}(\bvec{x}_0) = \bvec{y}_0$,$\bvec{f}(\bvec{g}(\bvec{x})) = \bvec{x}$;
|
||
\item $\bvec{g} \in C^1(B_\delta(\bvec{x}_0), \ndreal)$;
|
||
\item $J\bvec{g}(\bvec{x}) = [J\bvec{f}(\bvec{y})]^{-1}$,其中$\bvec{y} = \bvec{g}(\bvec{x})$。
|
||
\end{enumerate}
|
||
\end{theorem}
|
||
|
||
\begin{theorem}[逆映射定理]
|
||
设$\bvec{f} \in C^1(D, \ndreal), D \subset\ndreal$为开集,且$\bvec{f}:D \to \ndreal$为单射,对任意的$\bvec{y}$,$\det(J\bvec{f}(\bvec{y})) \neq 0$。则记$\Omega = \bvec{f}(D)$,存在$\bvec{f}$的反函数$\bvec{f}^{-1} \in C^1 (\Omega, \ndreal)$且对任意的$\bvec{x} \in \Omega$,
|
||
\[J\bvec{f}^{-1}(\bvec{x}) = (J\bvec{f}(\bvec{y}))^{-1}, \bvec{y} = \bvec{f}^{-1}(\bvec{x})\eqper\]
|
||
\end{theorem}
|
||
|
||
\section{高阶偏导数}
|
||
\begin{definition}
|
||
设在开集$D$上的每一点,函数$f$存在偏导数
|
||
\[D_i f(\bvec{x}) = \frac{\partial f}{\partial x_i}(\bvec{x}), i = 1, 2, \dots, n\]
|
||
称他们为$f$的一阶偏导数,如果对这些偏导函数又可取偏导数,得出的就是$f$的二阶偏导函数,仿照此可以定义三阶偏导函数乃至更高阶的偏导数。我们将以一阶偏导数$\dfrac{\partial f}{\partial x_j}$再对$x_i$求偏导数时,把$\dfrac{\partial}{\partial x_i}\left(\dfrac{\partial f}{\partial x_j}\right)$记作$\dfrac{\partial^2 f}{\partial x_i \partial x_j}$,如果$i = j$,那么把$\dfrac{\partial^2 f}{\partial x_i \partial x_i}$记作$\dfrac{\partial^2 f}{\partial x_i^2}$。
|
||
\end{definition}
|
||
|
||
\begin{theorem}[Clairaut定理]
|
||
设$f:D \to \realnum$,$D \subset \realnum^2$是开集,$P = (x_0, y_0) \in D$。若$\dfrac{\partial^2 f}{\partial x \partial y}$和$\dfrac{\partial^2 f}{\partial y \partial x}$在$D$内存在且在$P$点连续,那么二者在该点相等。
|
||
\end{theorem}
|
||
|
||
\begin{corollary}
|
||
设$f: D \to \realnum$,$D \subset \ndreal$是开集。若$f$在$D$内所有$k$阶偏导数都存在且连续,则$k$阶偏导数的值与关于自变量的求导次序无关。
|
||
\end{corollary}
|
||
|
||
\section{拟微分平均值定理}
|
||
这个下面这个定理主要阐述的是多元数值函数的中值定理。
|
||
\begin{theorem}
|
||
设定义在凸区域$D \subset \ndreal$上的函数$f$可微,则对任何两点$\bvec{a}, \bvec{b} \in D$,在由$\bvec{a}$与$\bvec{b}$确定的直线段上有一点$\bvec{\xi}$使得
|
||
\[f(\bvec{b}) - f(\bvec{a}) = Jf(\bvec{\xi})(\bvec{b} - \bvec{a})\eqper\]
|
||
\end{theorem}
|
||
|
||
对于向量值函数,中值定理不一定成立,而有下面的拟微分平均值定理:
|
||
\begin{theorem}[拟微分平均值定理]
|
||
设凸区域$D \subset \ndreal$且$\boldf: D \to \realnum^m$,映射\boldf 在$D$上可微,则对于任何$\bvec{a}, \bvec{b} \in D$,在由$\bvec{a}, \bvec{b}$确定的线段上必有一点$\bvec{\xi}$使得
|
||
\[\norm{\boldf(\bvec{b}) - \boldf(\bvec{a})} \leq \norm{J\boldf(\bvec{\xi})} \norm{\bvec{b} - \bvec{a}}\eqper\]
|
||
\end{theorem}
|
||
|
||
\begin{corollary}
|
||
设区域$D \subset \ndreal$,$\bvec{f}:D \to \realnum^m$,如果$J\bvec{f} = \bvec{0}$在$D$上成立,则\boldf 在$D$上为一常向量。
|
||
\end{corollary}
|
||
|
||
\section{Taylor公式}
|
||
考虑用多项式近似一个多元函数,即对在$\bvec{a}$点有$m + 1$阶连续偏导数的$n$元函数$f(x)$,是否有$m$次多项式$P_m(\bvec{x})$,使得
|
||
\[f(\bvec{a} + \Delta \bvec{x}) = P_m (\Delta \bvec{x}) + o(\norm{\Delta \bvec{x}}^m)\]
|
||
成立?
|
||
|
||
我们设$f \in C^{m + 1}(B_r(\bvec{a})), \bvec{a} \in \ndreal, r > 0$,那么对任意满足$\norm{\Delta \bvec{x}} < r$的$\Delta \bvec{x}$,定义
|
||
\[\varphi(t) = f(\bvec{a} + t\Delta \bvec{x}) \in C^{m + 1}[0, 1]\]
|
||
应用一元函数的Taylor公式:对任意的$t \in [0, 1]$,都存在$\theta \in (0, 1)$满足
|
||
\[\varphi(t) = \sum_{k = 1}^m \frac{\varphi^{(k)}(0)}{k!}t^k + \frac{\varphi^{(m + 1)}(\theta t)}{(m + 1)!} t^{m + 1}\]
|
||
特别取$t = 1$得到
|
||
\[\varphi(1) = \sum_{k = 1}^m \frac{\varphi^{(k)}(0)}{k!} + \frac{\varphi^{(m + 1)}(\theta)}{(m + 1)!}\]
|
||
将引入的一元函数的表达式带入(特别注意求导项的带入):
|
||
计算
|
||
\begin{align*}
|
||
& \deriv{\varphi} (t) = \sum_{i = 1}^n \frac{\partial f(\bvec{a} + t\Delta \bvec{x})}{\partial x_i} \Delta x_i, \deriv{\varphi}(0) = \sum_{i = 1}^n \frac{\partial f(\bvec{a})}{\partial x_i}\Delta x_i\\
|
||
& \varphi^{\prime \prime} (t) = \sum_{i, j = 1}^n \frac{\partial^2 f(\bvec{a} + t\Delta \bvec{x})}{\partial x_i \partial x_j} \Delta x_i \Delta x_j, \varphi^{\prime \prime} (0) = \sum_{i, j = 1}^n \frac{\partial ^2 f(\bvec{a})}{\partial x_i \partial x_j}\Delta x_i \Delta x_j\\
|
||
& \dots \dots
|
||
\end{align*}
|
||
|
||
我们引入记号$\alpha = (\alpha_1, \dots, \alpha_n)$,其中每个$\alpha_i$都是非负整数,记
|
||
\[\abs{\alpha} = \alpha_1 + \dots + \alpha_n, \alpha! = \alpha_1! \dots \alpha_n!\]
|
||
如果$\bvec{x} = (x_1, \dots, x_n)$,那么记$x^\alpha = x_1^{\alpha_1} \dots x_n^{\alpha_n}$。
|
||
|
||
对于多重指标$\alpha = (\alpha_1, \dots, \alpha_n)$,我们还引进记号
|
||
\[D^\alpha f(\bvec{a}) = \frac{\partial^{\alpha_1 + \dots + \alpha_n}f}{\partial x_1^{\alpha_1} \dots \partial x_n^{\alpha_n}} (\bvec{a})\eqper\]
|
||
|
||
于是我们可以叙述多元函数Taylor公式:
|
||
\begin{theorem}[多元函数Taylor公式]\label{多元函数Taylor公式}
|
||
设$D \subset \ndreal$是一个凸区域,$f \in C^{m + 1}(D)$。$\bvec{a} = (a_1, \dots, a_n)$和$\bvec{a} + \bvec{h} = (a_1 + h_1, \dots, a_n + h_n)$是$D$中两点,则必存在$\theta \in (0, 1)$使得
|
||
\[f(\bvec{a} + \bvec{h}) = \sum_{k = 0}^m \sum_{\abs{\alpha} = k} \frac{D^\alpha f(\bvec{a})}{\alpha!} h^\alpha + R_m\]
|
||
其中
|
||
\[R_m = \sum_{\abs{\alpha} = m + 1} \frac{D^\alpha f(\bvec{a} + \theta \bvec{h})}{\alpha!} h^\alpha\]
|
||
称为Lagrange余项。
|
||
\end{theorem}
|
||
|
||
这个定理中的和式的意思是,对每个$\alpha$,都求对$f$求$\alpha$次偏导的导函数在$\bvec{a}$处的值,并且对$x_i$求了$\alpha_i$次偏导就要在后面乘上$\dfrac{x_i^{\alpha_i}}{\alpha_i!}$。
|
||
|
||
我们再引入一个高阶微分的记号。对$\bvec{h} = (h_1, h_2, \dots, h_3)$,我们定义
|
||
\begin{align*}
|
||
& \qquad\left(h_1 \frac{\partial}{\partial x_1} + \dots + h_n \frac{\partial}{\partial x_n}\right)^k f(\bvec{a})\\
|
||
& = \sum_{\abs{\alpha} = k} \frac{k!}{\alpha!} \frac{\partial^{\alpha_1}}{\partial x_1^{\alpha_1}}\dots \frac{\partial^{\alpha_n}}{\partial x_n^{\alpha_n}}f(\bvec{a}) \bvec{h}^\alpha\\
|
||
& = \sum_{\abs{\alpha} = k} \frac{k!}{\alpha!}D^\alpha f(\bvec{a}) h^\alpha
|
||
\end{align*}
|
||
特别考虑二元函数的情况。如果设$x, y$的改变量为$h, k$,那么
|
||
\begin{align*}
|
||
\left(h \frac{\partial}{\partial x} + k \frac{\partial}{\partial y}\right) f(x, y) & = \frac{\partial f(x, y)}{\partial x}h + \frac{\partial f(x, y)}{\partial y}k\\
|
||
\left(h \frac{\partial}{\partial x} + k \frac{\partial}{\partial y}\right)^2 f(x, y) & = \frac{\partial^2 f(x, y)}{\partial x^2}h^2 + 2 \frac{\partial^2 f(x, y)}{\partial x \partial y}hk + \frac{\partial^2 f(x, y)}{\partial y^2}k^2\\
|
||
\left(h \frac{\partial}{\partial x} + k \frac{\partial}{\partial y}\right)^m f(x, y) & = \sum_{i = 0}^m \binom{m}{i} \frac{\partial^m f(x, y)}{\partial x^ \partial y^{m - i}} h^i k^{m - i}, m = 1, \dots, n + 1
|
||
\end{align*}
|
||
|
||
在一般的应用中,特别重要的是Taylor公式的前三项。把他们具体写出来:
|
||
\[f(\bvec{a} + \bvec{h}) = f(\bvec{a}) + \sum_{i = 1}^n \frac{\partial f}{\partial x_i}(\bvec{a})h_i + \frac{1}{2} \sum_{i, j = 1}^n \frac{\partial^2 f}{\partial x_i \partial x_j} (\bvec{a}) h_i h_j + \cdots\]
|
||
如果记
|
||
\[Hf(\bvec{a}) = \begin{bmatrix}
|
||
\dfrac{\partial^2 f}{\partial x_1^2}(\bvec{a}) & \cdots & \dfrac{\partial^2 f}{\partial x_1 \partial x_n}(\bvec{a})\\[1em]
|
||
\vdots & \ddots & \vdots\\[1ex]
|
||
\dfrac{\partial^2 f}{\partial x_n \partial x_1}(\bvec{a}) & \cdots & \dfrac{\partial^2 f}{\partial x_n^2}(\bvec{a})
|
||
\end{bmatrix}\]
|
||
进一步简记为
|
||
\[Hf(\bvec{a}) = \begin{bmatrix}
|
||
D_{11} f(\bvec{x}) & \cdots & D_{1n} f(\bvec{x})\\
|
||
\vdots & \ddots & \vdots\\
|
||
D_{n1} f(\bvec{x}) & \cdots & D_{nn} f(\bvec{x})
|
||
\end{bmatrix}\eqper\]
|
||
这$Hf$称为$f$的Hessian,它是一个$n$阶对称方阵。
|
||
|
||
那么前面的Taylor公式可以写成
|
||
\[f(\bvec{a} + \bvec{h}) = f(\bvec{a}) + Jf(\bvec{a})\bvec{h} + \frac{1}{2} \bvec{h}^{\mathrm{T}} Hf(\bvec{a}) \bvec{h} + \cdots\]
|
||
|
||
\begin{theorem}
|
||
在定理\ref{多元函数Taylor公式}的条件下,
|
||
\[R_m = O(\norm{\bvec{h}}^{m + 1})\eqper\]
|
||
于是我们可以把Taylor公式写成Peano余项的形式:
|
||
\[f(\bvec{a} + \bvec{h}) = f(\bvec{a}) + Jf(\bvec{a}) \bvec{h} + \frac{1}{2} \bvec{h}^{\mathrm{T}} Hf(\bvec{a}) \bvec{h} + o(\norm{\bvec{h}}^2)\]
|
||
\end{theorem}
|
||
|
||
\section{极值}
|
||
\begin{definition}
|
||
设$D \subset \ndreal$,函数$f:D \to \realnum$,点$\bvec{p}_0 \in D\interior$,如果存在一个球$B_r(\bvec{p}_0) \subset D\interior$,使得$f(\bvec{p}) \geq f(\bvec{p}_0)$($f(\bvec{p}) > f(\bvec{p}_0)$)对一切$\bvec{p} \in B_r(\hat{\bvec{p}}_0)$成立,那么$\bvec{p}_0$称为$f$的一个(严格)极小值点,而$f(\bvec{p}_0)$称为函数$f$的一个(严格)极小值。
|
||
|
||
同样地可以定义(严格)极大值点和(严格)极大值。极小值和极大值统称极值。
|
||
\end{definition}
|
||
|
||
类似于Fermat引理,我们可以得到极值点的必要条件
|
||
\begin{theorem}
|
||
设$n$元函数$f$在$\bvec{p}_0$取得极值,且$Jf(\bvec{p}_0)$存在,那么必须有$Jf(\bvec{p}_0) = \bvec{0}$。
|
||
|
||
设$\bvec{u}$是任意方向向量,则$D_{\bvec{u}} f(\bvec{a}) = 0$。
|
||
\end{theorem}
|
||
|
||
\begin{definition}
|
||
$D$中使得$Jf(\bvec{p}) = \bvec{0}$的一切内点称为函数函数$f$的驻点。极值点一定是煮点,而驻点未必是极值点。
|
||
\end{definition}
|
||
|
||
\begin{definition}
|
||
设
|
||
\(A = \begin{bmatrix}
|
||
a_{ij}
|
||
\end{bmatrix}\)
|
||
是一个$n$阶对称方阵。设
|
||
\[\bvec{x} = \begin{bmatrix}
|
||
x_1\\ x_2\\ \vdots\\ x_n
|
||
\end{bmatrix}\]
|
||
称
|
||
\[Q(\bvec{x}) = \bvec{x}^\mathrm{T} A \bvec{x} = \sum_{i, j = 1}^n a_{ij}x_i x_j\]
|
||
为$x_1, x_2, \dots, x_n$的一个二次型,方阵$A$称为二次型$Q$的系数方阵。
|
||
|
||
如果对任意$\bvec{x} \neq \bvec{0}$都有$Q(\bvec{x}) \geq 0$($\leq 0$),则称二次型$Q$是正(负)定的,其系数方阵$A$相应地称为正(负)定方阵。
|
||
|
||
如果对任意$\bvec{x} \neq \bvec{0}$都有$Q(\bvec{x}) > 0$($< 0$),则称二次型$Q$是严格正(负)定的,其系数方阵$A$相应地称为严格正(负)定方阵。
|
||
|
||
如果总存在$\bvec{p}, \bvec{q} \in \ndreal$,使得$Q(\bvec{p}) < 0 < Q(\bvec{q})$,旧称二次型$Q$是不定的,其系数方阵$A$相应地称为不定方阵。
|
||
\end{definition}
|
||
|
||
\begin{theorem}
|
||
设
|
||
\(A = \begin{bmatrix}
|
||
a_{ij}
|
||
\end{bmatrix}\)
|
||
是一个$n$阶方阵。方阵$A$为严格正定的一个必要充分条件是它的各级顺序主子式均大于零。
|
||
\end{theorem}
|
||
|
||
欲证明一个方阵$A$负定,只需证明$-A$是正定的即可。
|
||
|
||
\begin{theorem}
|
||
设二阶对称方阵
|
||
\[A = \begin{bmatrix}
|
||
a_{11} & a_{12}\\
|
||
a_{21} & a_{22}
|
||
\end{bmatrix}\]
|
||
$A$为严格正(负)定的一个必要充分条件是
|
||
\[a_{11} > 0(a_{11} < 0),\]
|
||
\[\begin{vmatrix}
|
||
a_{11} & a_{12}\\
|
||
a_{21} & a_{22}
|
||
\end{vmatrix} > 0\]
|
||
|
||
$A$为不定矩阵的一个充分必要条件是
|
||
\[\begin{vmatrix}
|
||
a_{11} & a_{12}\\
|
||
a_{21} & a_{22}
|
||
\end{vmatrix} < 0\eqper\]
|
||
\end{theorem}
|
||
|
||
\begin{theorem}
|
||
设$\bvec{x}_0$是函数$f$的一个驻点,函数$f$在$\bvec{x}_0$的某一临域内有连续的二阶偏导数。
|
||
\begin{enumerate}
|
||
\item 如果Hessian $Hf(\bvec{x}_0)$是严格正定(负)方阵,那么$\bvec{x}_0$是$f$的一个严格极小(大)值点。
|
||
\item 如果Hessian $Hf(\bvec{x}_0)$是不定方阵,那么$\bvec{x}_0$不是$f$的极值点。
|
||
\end{enumerate}
|
||
\end{theorem}
|
||
|
||
\section{条件极值}
|
||
首先我们引入一个问题。设$f: D \to \realnum$,$\Phi: D \to \realnum^m$,$D \subset \realnum^{n + m}$是开集,$(\bvec{x}, \bvec{y}) \in \realnum^{n + m}$。求满足$\Phi(\bvec{x}, \bvec{y}) = \bvec{0}$的条件下$f(\bvec{x}, \bvec{y})$的最大/最小值,记为
|
||
\[\begin{cases}
|
||
\max f(\bvec{x}, \bvec{y})\\
|
||
\Phi(\bvec{x}, \bvec{y}) = 0
|
||
\end{cases}
|
||
\text{或}
|
||
\begin{cases}
|
||
\min f(\bvec{x}, \bvec{y})\\
|
||
\Phi(\bvec{x}, \bvec{y}) = 0
|
||
\end{cases}\]
|
||
其中$f(\bvec{x}, \bvec{y})$称为目标函数,$\Phi(\bvec{x}, \bvec{y}) = \bvec{0}$称为约束条件。
|
||
|
||
设在约束$\varphi(\bvec{x}, y) = 0$下$f(\bvec{x}, y)$在$P = (\bvec{x}_0, y_0) \in D$点达到极值,且$\dfrac{\partial \varphi}{\partial y}(\bvec{x}_0, y_0) \neq 0$,应用隐函数定理:在$P$点附近,方程$\varphi(\bvec{x}, y) = \bvec{0}$确定了隐函数$y = y(\bvec{x})$。函数$F(\bvec{x}) = f(\bvec{x}, y(\bvec{x}))$在$\bvec{x}_0$达到极值,因此$J F(\bvec{x}_0) = \bvec{0}$。
|
||
因此
|
||
\[\frac{\partial F}{\partial x_i} (\bvec{x}_0) = \frac{\partial f}{\partial x_i}(\bvec{x}_0, y_0) + \frac{\partial f}{\partial y} \frac{\partial y}{\partial x_i}(\bvec{x}_0) = 0, i = 1, 2, \cdots, n\]
|
||
而根据隐函数定理,
|
||
\[\frac{\partial y}{\partial x_i}(\bvec{x}_0) = -\frac{\dfrac{\partial \varphi}{\partial x_i}(\bvec{x}_0, y_0)}{\dfrac{\partial \varphi}{\partial y}(\bvec{x}_0, y_0)}\]
|
||
将它带入上式,
|
||
\[\frac{\partial f}{\partial x_i}(\bvec{x}_0, y_0) - \frac{\dfrac{\partial f}{\partial y}{(\bvec{x}_0, y_0)} \dfrac{\partial \varphi}{\partial x_i}(\bvec{x}_0, y_0)}{\dfrac{\partial \varphi}{\partial y}(\bvec{x}_0, y_0)} = 0\]
|
||
引入参数
|
||
\[\lambda = -\frac{\dfrac{\partial f}{\partial y}(\bvec{x}_0, y_0)}{\dfrac{\partial \varphi}{\partial y}(\bvec{x}_0, y_0)}\]
|
||
那么
|
||
\[\frac{\partial f}{\partial x_i}(\bvec{x}_0, y_0) + \lambda \frac{\partial \varphi}{\partial x_i}(\bvec{x}_0, y_0) = 0, i = 1, 2, \cdots, n\]
|
||
因此我们得到
|
||
\[Jf(\bvec{x}_0, y_0) + \lambda J\varphi(\bvec{x}_0, y_0) = \bvec{0}\]
|
||
是条件极值的一个必要条件。
|
||
|
||
\begin{theorem}[条件极值的必要条件]
|
||
设$f \in C^1(D)$,$\Phi \in C^1 (D, \realnum^m)$,$D \subset \realnum^{n + m}$是开集。记$P = (\bvec{x}_0, \bvec{y}_0) \in D$,又设$\det(J_{\bvec{y}}\Phi(\bvec{x}_0, \bvec{y}_0)) \neq 0$。如果$f(\bvec{x}, \bvec{y})$在约束$\Phi(\bvec{x}, \bvec{y}) = \bvec{0}$下在$P$点达到极值,则存在$\bvec{\Lambda} = (\lambda_1, \dots, \lambda_m)$使得
|
||
\[Jf(\bvec{x}_0, \bvec{y}_0) + \bvec{\Lambda} J \Phi(\bvec{x}_0, \bvec{y}_0) = \bvec{0}\eqper\]
|
||
这也就是说$(\bvec{x}_0, \bvec{y}_0)$满足方程组
|
||
\begin{align*}
|
||
& \frac{\partial f}{\partial x_k}(\bvec{x}_0, \bvec{y}_0) + \sum_{i = 1}^m \lambda_i \frac{\partial \Phi_i}{\partial x_k}(\bvec{x}_0, \bvec{y}_0) = 0, k = 1, \cdots, n\\
|
||
& \frac{\partial f}{\partial y_j}(\bvec{x}_0, \bvec{y}_0) + \sum_{i = 1}^m \lambda_i \frac{\partial \Phi_i}{\partial y_j}(\bvec{x}_0, \bvec{y}_0) = 0, j = 1, \cdots, m\\
|
||
\end{align*}
|
||
\end{theorem}
|
||
|
||
根据这个定理,我们可以引入Lagrange乘数法。定义函数$L: D \times \realnum^m \to \realnum$,
|
||
\[L(\bvec{z}, \bvec{\Lambda}) = f(\bvec{z}) + \bvec{\lambda} \Phi(\bvec{z}), (\bvec{z}, \bvec{\Lambda}) \in D \times \realnum^m\]
|
||
$L$称为条件极值问题的Lagrange函数,$\bvec{\Lambda}$称为Lagrange乘数/乘子。根据条件极值的表要条件,在条件极值点$\bvec{z}_0 \in D$,存在$\bvec{\Lambda} \in \realnum^m$满足
|
||
\[J_{\bvec{z}} L (\bvec{z}_0, \bvec{\Lambda}) = J_{\bvec{z}} f(\bvec{z}_0) + \bvec{\Lambda} J_{\bvec{z}}\Phi(\bvec{z}_0) = \bvec{0}\]
|
||
此外
|
||
\[J_{\bvec{\Lambda}} L(\bvec{z}_0, \bvec{\Lambda}) = \Phi(\bvec{z}_0) = \bvec{0}\]
|
||
因此
|
||
\[JL(\bvec{z}, \bvec{\Lambda}) = (J_{\bvec{z}}, J_{\bvec{\Lambda}}) = \bvec{0} \eqper\] |