diff --git a/00-prologue/Makefile b/00-prologue/Makefile new file mode 100644 index 0000000..fd4ff4d --- /dev/null +++ b/00-prologue/Makefile @@ -0,0 +1,28 @@ +# You want latexmk to *always* run, because make does not have all the info. +# Also, include non-file targets in .PHONY so they are run regardless of any +# file of the given name existing. +.PHONY: course-notes-0.pdf + +# The first rule in a Makefile is the one executed by default ("make"). It +# should always be the "all" rule, so that "make" and "make all" are identical. +all: course-notes-0.pdf + +# MAIN LATEXMK RULE + +CC = latexmk + +# -pdf tells latexmk to generate PDF directly (instead of DVI). +# -pdflatex="" tells latexmk to call a specific backend with specific options. +# -use-make tells latexmk to call make for generating missing files. + +# -interaction=nonstopmode keeps the pdflatex backend from stopping at a +# missing file reference and interactively asking you for an alternative. +CFLAGS = -pdf -pdflatex="pdflatex -interaction=nonstopmode" -use-make + +course-notes-0.pdf: course-notes-0.tex + $(CC) $(CFLAGS) course-notes-0.tex + +# latexmk +# -CA clean up (remove) all nonessential files. +clean: + $(CC) -CA diff --git a/00-prologue/course-notes-0.tex b/00-prologue/course-notes-0.tex new file mode 100644 index 0000000..b509fd2 --- /dev/null +++ b/00-prologue/course-notes-0.tex @@ -0,0 +1,269 @@ +\documentclass{article} + +% Packages +\usepackage{graphicx} % Required for inserting images +\usepackage{amsfonts} +\usepackage{mathtools} % Required for comments inside equations (Required for display matrices. Extension on top of amsmath package.) +\usepackage{amsmath} +\usepackage{bm} % for rendering vectors correctly +\usepackage{xcolor} % for displaying color +\usepackage{dashrule} % for dashes +\usepackage{physics} % for norm symbol +\usepackage{todonotes} + +\title{CS231A Course Notes 0} +\author{Ashish Jain} +\date{March 2023} + +% Commands +\newcommand{\mathvecr}[1]{$\bm{#1}$} +\newcommand{\mathmatr}[1]{$\mathbf{#1}$} % undergraduate algebra version +\newcommand{\vecr}[1]{\bm{#1}} +\newcommand{\matr}[1]{\mathbf{#1}} % undergraduate algebra version +\newcommand{\transpose}[1]{#1^\top} +\newcommand{\cogoline}[1]{$\mathsf{#1}$} +\newcommand{\cogoslope}[1]{$\mathsf{#1}$} +\newcommand{\cogopoint}[1]{$\mathtt{#1}$} +\newcommand{\at}[2][]{#1|_{#2}} % derivative at a particular point + +\newcommand{\eqncomment}[1]{ +\footnotesize +\textcolor{gray}{ +\begin{pmatrix*}[l] +\text{#1} +\end{pmatrix*} +}} +\newcommand{\longeqncomment}[2] +{\footnotesize +\textcolor{gray}{ +\begin{pmatrix*}[l] +\text{#1} \\ +\text{#2} +\end{pmatrix*} +}} + +% Math operators +\DeclareMathOperator{\svd}{svd} + +\begin{document} + +\maketitle + +\section{Homogeneous Coordinates} +\subsection{Projective Geometry} +Geometry has its roots in ancient Egypt and Babylonia, where empirical knowledge was acquired through the experience of surveyors, architects, and builders. Later, Greek geometers explored the logical connections between geometric facts, and derived a number of theorems using deduction from a small set of axioms, and the same is codified in Euclid's Elements \cite{wiki:Euclid's_Elements}. Euclidean geometry is interesting because it is closely related to the space we experience in everyday life. + +However, a few hundred years ago, interest developed in capturing the 3D scene on a canvas more realistically, and this gave rise to the field of perspective geometry. The word perspective comes from the Medieval Latin words ``per" (meaning ``through") and ``specere" (meaning ``to look at"). Put simply, the theory of perspective considers the artist's canvas as if it were a clear screen. The artist paints while looking through this screen from a fixed position, as if she is viewing the scene she is painting from a particular vantage point. Projective geometry, then, is the study of the properties of figures which are left unchanged by projections. + +There are several ways in which we can study projective geometry. The first approach is called the synthetic approach, and is in line with proofs similar to those found in Euclid's Elements wherein we argue directly about geometric entities such as points and lines, and the geometrical relationships between them. The second approach is the coordinate or analytical approach which allows us to prove theorems more easily compared to the synthetic approach. It was introduced by Descartes wherein points, lines and other geometrical entities are represented by sets of coordinates. The third approach to studying projective geometry is based on axioms and networks of formal relations between those axioms. We will primarily concern ourselves with the analytical approach as part of this course. + +In 1872, Felix Klein introduced the Erlangen program which from our standpoint introduced euclidean geometry as a subset of affine geometry, and affine geometry as a subset of projective geometry \cite{wiki:Erlangen_program}. Projective geometry is not only more general but more symmetrical than euclidean geometry, and when we use homogeneous coordinates, the algebra for projective geometry becomes linear \cite{semple1952}. + +\subsection{Why homogeneous coordinates and what are homogeneous coordinates?} +Let us start by talking about homogeneous coordinates in the euclidean plane. In Euclid's system, we often distinguish between different cases, for example, tangency and parallelism. Once we applied algebra to geometry, it allowed geometers to treat certain cases uniformly. For example, we could start treating the intersection of a line with a circle (or any other conic) using real or complex points. In other words, a secant can cut a circle in two distinct points, a tangent in two coincident points (two coincident points because we can interpret a tangent as the limiting case of a chord in which one of the points of intersection tends towards coincidence with the other), and a line that does not touch the circle intersects it in two conjugate complex points \cite{semple1952}. Therefore, we can now state for example that every line intersects a circle in two points. + +Let us now talk about the intersection of two lines. In Euclid's system, there are two types of line-pairs in the plane: intersecting lines and parallel lines. If \cogoline{l} and \cogoline{m} are two intersecting lines, and we rotate say \cogoline{l} about a point \cogopoint{A} on the line \cogoline{l}, and as we do, the point of intersection between lines \cogoline{l} and m moves further and further away until it disappears altogether. If we rotate \cogoline{l} further, then the point of intersection reappears on the other side and moves along \cogoline{m} towards \cogopoint{A}. Therefore, as a figure of speech, we can say that parallel lines meet at a point at infinity. Now, let's say that the point at infinity has a direction, and if we club together the various points at infinity, and call it as a line at infinity, we can say that any two distinct lines meet at a unique point. Furthermore, we can say that a unique line passes through any two distinct points. + +Let us now consider how we can represent these points at infinity algebraically. For two lines, $a_1x+b_1y+c=0$ and $a_2x+b_2y+c=0$, we can determine whether the two lines meet or not by computing the determinant $a_1b_2 - a_2b_1$. However, we would like to capture the intersection of parallel lines as well. Observe, whether or not the two lines are parallel, the ratio $b_1c_2 - b_2c_1:c_1a_2 - c_2a_1:a_1b_2 - a_2b_1$ is always determinate, that is, at least one of the quantities is different from zero. Taking inspiration from this observation, if we now represent any point in euclidean coordinates say $(X, Y)$ by a triad of homogeneous coordinates $(x, y, z)$ then we can go from homogeneous coordinates $(x, y, z)$ back to euclidean coordinates by using equations \ref{eqn:HomogeneousToEuclideanX} and \ref{eqn:HomogeneousToEuclideanY}. + +\begin{flalign} +X &= x/z \label{eqn:HomogeneousToEuclideanX} \\ +Y &= y/z \label{eqn:HomogeneousToEuclideanY} +\end{flalign} + +As you can observe from equations \ref{eqn:HomogeneousToEuclideanX} and \ref{eqn:HomogeneousToEuclideanY}, proportional triads in homogeneous coordinates shall always represent the same point in euclidean coordinates for example $(10,10,1)$, $(20,20,2)$, $(1,1,0.1)$, et cetera. However, we need to also talk about the special triad $(0,0,0)$. We will always exclude this special triad from homogeneous coordinates. + +Let us now talk about how points in the Euclidean space and points at infinity are represented in homogeneous coordinates. Clearly, for any triad $(x, y, z)$ where $z\neq0$ maps to a point in euclidean space. Any triad $(x, y, z)$ where $z=0$ represents a point at infinity. Taking this further, if we consider all triads where $z=0$, we get the line at infinity which contains every point at infinity. + +Observe that if a point \cogopoint{P} $(X,Y)$ in the euclidean plane $E_2$ is given then the homogeneous coordinates of the point are not uniquely determined. In fact, if $(x_1, y_1, z_1)$ are homogeneous coordinates of a valid point in euclidean space then $(kx_1, ky_1, kz_1)$ are also homogeneous coordinates of \cogopoint{P}. On the other hand, if $(x_1, y_1, z_1)$ in homogeneous coordinates is given then its rectangular coordinates is uniquely determined. + +\subsection{Intuition} \label{sec:intuition} +Thus far, homogeneous coordinates feel very much like a very abstract system which we apply mechanically. However, that's far from the case. Imagine our homogeneous coordinates corresponding to a euclidean plane $E_2$ as a 3D euclidean space where all lines and all planes pass through the origin. In such a framework, the euclidean plane $E_2$ corresponding to our homogeneous coordinates is a plane at $z=1$ in our 3-space, and the aforementioned line at infinity is actually the plane $z=0$ \cite{wildberger2011projective} \cite{stachniss_homogeneous_coordinates}. + +\subsection{Entities in euclidean space in homogeneous coordinates} +Consider a line $aX+bY+c=0$ in euclidean $E_2$ space. In homogeneous coordinates, the equation of the line becomes $ax+by+cz=0$. Geometrically, in corresponding $E_3$ homogeneous coordinate space, you can think of the line $aX+bY+c=0$ as the intersection of the planes represented by $ax+by+cz=0$ and the plane $z=1$. + +Also, observe that the equation of the line in homogeneous coordinates is always homogeneous i.e. all the terms in the homogeneous coordinate equation have the same total degree in the variables belonging to the set $\{x,y,z\}$ . +Similarly, the general equation of a conic in rectangular coordinates as shown in equation \ref{eqn:euclidean_conic} becomes homogeneous as shown in equation \ref{eqn:homogeneous_conic}. It is this homogeneous property of homogeneous coordinates that lends it its name \cite{wylie2008}. + +\begin{flalign} +a_{11}x^2+2a_{12}xy+a_{22}y^2+2a_{13}x+2a_{23}y+a_{33}&=0 \label{eqn:euclidean_conic} \\ +a_{11}x^2+2a_{12}xy+a_{22}y^2+2a_{13}xz+2a_{23}yz+a_{33}z^2&=0 \label{eqn:homogeneous_conic} +\end{flalign} + +Let us now see how a point in euclidean $E_2$ space can be represented in homogeneous coordinates. Consider a point \cogopoint{P} $(X,Y)$ in euclidean space then the line from the origin $(0,0,0)$ to the point $(X,Y,1)$ on the plane $z=1$ in homogeneous coordinates will represent the point \cogopoint{P} in euclidean space. Any 3-space point in homogeneous coordinates on the line will map to \cogopoint{P}. Points in euclidean $E_2$ space can also be represented as the intersection of two lines in $E_2$ space. However, lines in euclidean space map to planes in homogeneous coordinates. Therefore, the intersection of two non-parallel planes in homogeneous coordinates in 3-space will result in a line in euclidean $E_2$ space. Note, that the planes in homogeneous coordinates will pass through the origin, therefore, they will always intersect in 3-space, and therefore, we can never have two distinct parallel planes in homogeneous coordinate 3-space. We can find the intersection of two planes by computing the cross-product of their normals. Note, this merely gives us the direction. However, we know that all lines in homogeneous coordinates pass through the origin as mention in section \ref{sec:intuition}. Therefore, if we draw a ray from the origin along the direction of the computed cross-product, we will get our desired point. Let us now talk about parallel lines in euclidean $E_2$ space. Consider, two parallel lines namely $Y=0$ and $Y=1$. The corresponding planes in homogeneous coordinates 3-space both pass through the origin, and their cross-product will pass through the plane $z=0$. Note, if we draw the ray from the origin along the computed vector, we will get our desired point in homogeneous coordinates which is a point at infinity. + +\subsection{Duality} +Rather than treating points as primary entities, and lines as sets of points, let us now treat lines as primary entities. In this framework, we can define points in terms of lines as a point is defined by the complete set of lines that pass through it, such that every point becomes the envelope of a variable line. Consider a line $ux + vy + wz=0$ in the original homogeneous coordinate system, if we fix $(x,y,z)$ and vary $(u,v,w)$ then $(u_1,v_1,w_1)$ which satisfies $ux + vy + wz=0$ can be considered its homogeneous coordinates of some line \cogoline{l_1} in homogeneous line-coordinates just as $(x,y,z)$ are considered homogeneous point-coordinates of a point \cogopoint{P}. + +In our regular homogeneous point-coordinate system, $(u,v,w)$ are fixed and we vary the points $(x,y,z)$. However, if we fix $(x,y,z$ in our alternate homogeneous line-coordinate system then the equation is describing lines which pass through the point $(x,y,z)$. The hough space concept refers to this idea as well if you have encountered that concept before. In short, there is completely duality between the representation of points and lines in terms of point-coordinates, on the one hand, and the representation of lines and points in terms of line-coordinates, on the other \cite{semple1952}. + +\subsection{Higher dimensions} +We have thus far talked about euclidean $E_2$ space, and corresponding 3-space homogeneous coordinates. However, homogeneous coordinates are not limited to euclidean $E_2$ space, and can be extended to higher dimensional euclidean spaces as well in a similar manner to euclidean $E_2$ space. + +\section{Solving Linear Systems} +A system of linear equations (or linear system) is a collection of one or more linear equations involving the same variables \cite{wiki:systemoflinearequations}. In this section, we will cover techniques from linear algebra that are essential for solving linear systems. + +\subsection{Heterogeneous Linear Systems} +Let's start by reviewing how we can solve heterogeneous linear systems such as in equation \ref{eqn:heterogeneouslinearsystem} where \mathmatr{A} is a matrix with size $(m \times n)$, \mathvecr{x} is a column vector of size $(n \times 1)$ and \mathvecr{b} is a column vector of size $(m \times 1)$. By heterogeneous, we mean linear systems whose right-hand side is a non-zero vector. A tuple $(s_1,s_2, ...,s_n)$ of numbers that makes each equation in the system true when the values $(s_1,s_2, ...,s_n)$ are substituted for $(x_1, x_2, ..., x_n)$ respectively. + +\begin{flalign} +\matr{A}\vecr{x} &= \vecr{b} & \hfill \eqncomment{\mathvecr{b} is non-zero} \label{eqn:heterogeneouslinearsystem} +\end{flalign} + +The number of solutions which satisfy the equation depend on the sizes of the quantities involved. We can divide the possibilities into three cases: + +\begin{enumerate} + \item If $m < n$, then, we say the heterogeneous linear system is undetermined. In this case, the number of constraints is lesser than the number of unknowns. As a result, the linear system will have an infinite number of solutions. + \item If $m = n$ and matrix \mathmatr{A} is invertible, then the number of constraints is exactly as many unknowns. However, if \mathmatr{A} is not invertible, then, the system has an infinite number of solutions. + \item If $m > n$, then, we say the heterogeneous linear system is overdetermined. In this case, the number of constraints is more than the number of unknowns. The system will have no solution if the constraints are linearly independent. In such cases, we convert the heterogeneous linear system to an optimization problem of the form $\norm{\matr{A}\vecr{x} - \vecr{b}}^2$, and try to find a least squares solution. +\end{enumerate} + +\subsubsection{Overdetermined Heterogenous Linear System} +Let's now see how we can solve the optimization problem. Let the loss function be $L = \norm{\matr{A}\vecr{x} - \vecr{b}}^2$. If we expand our loss function, we get: + +\begin{flalign} +L &= \transpose{\vecr{x}}(\transpose{\matr{A}}\matr{A})\vecr{x} - 2\vecr{x}(\transpose{\matr{A}}\vecr{b}) + \norm{\vecr{b}}^2 \nonumber +\end{flalign} + +Let's now take the derivative of the loss function with respect to $\vecr{x}$: + +\begin{flalign} +\frac{\partial L}{\partial \vecr{x}} &= 2(\transpose{\matr{A}}\matr{A})\vecr{x} - 2(\transpose{\matr{A}}\vecr{b}) \nonumber +\end{flalign} + +To find the optimal solution in the least squares sense, we will set the derivative to zero: + +\begin{flalign} +& \frac{\partial L}{\partial \vecr{x}} = 0 \nonumber \\ +\implies & 2(\transpose{\matr{A}}\matr{A})\vecr{x} - 2(\transpose{\matr{A}}\vecr{b}) = 0 \nonumber \\ +\implies & (\transpose{\matr{A}}\matr{A})\vecr{x} = \transpose{\matr{A}}\vecr{b} \nonumber \\ +\implies & \vecr{x} = (\transpose{\matr{A}}\matr{A})^{-1} \transpose{\matr{A}}\vecr{b} \label{eqn:heterogeneous linear system solution} +\end{flalign} + +The matrix $(\transpose{\matr{A}}\matr{A})^{-1} \transpose{\matr{A}}$ is called the pseudoinverse of the original $(m \times n)$ matrix \mathmatr{A}. Cases: + +\begin{itemize} +\item If $m=n$, then, $(\transpose{\matr{A}}\matr{A})^{-1} \transpose{\matr{A}} = \matr{A}^{-1}$ +\item If $m>n$, then, $\transpose{\matr{A}}\matr{A}$ of shape $(n \times n)$ is typically invertible, and therefore, we can determine the solution to our overdetermined heterogeneous linear system. +\end{itemize} + +\subsection{Homogeneous Linear Systems} +Let's start by reviewing how we can solve homogeneous linear systems such as in equation \ref{eqn:homogeneouslinearsystem} where \mathmatr{A} is a matrix with size $(m \times n)$, \mathvecr{x} is a column vector of size $(n \times 1)$ and \mathvecr{0} is a column vector of size $(m \times 1)$. Observe that the n-tuple $(0,0, ...,0)$ is a valid trivial solution. For homogeneous linear systems, we are typically not interested in the trivial solution as it is typically not useful for our use cases. + +\begin{flalign} +\matr{A}\vecr{x} &= \vecr{0} & \hfill\label{eqn:homogeneouslinearsystem} +\end{flalign} + +We will again try to solve the homogeneous linear system by converting it to an optimization problem. However, given we are not interested in the trivial solution, we will add an additional constraint $\vecr{x}:\norm{\vecr{x}}^2 = 1$ to search only among vectors whose length is equal to 1. The converted optimization problem is shown in equation \ref{eqn:homogeneouslinearsystem as optimization problem}. + +\begin{flalign} +\underset{\vecr{x}:\norm{\vecr{x}}^2 = 1}{\min} \norm{\matr{A}\vecr{x}}^2 & \hfill\label{eqn:homogeneouslinearsystem as optimization problem} +\end{flalign} + +Now, given this is a constrained optimization problem, we can no longer solve it by setting the derivative of the loss function to zero. Instead, we will solve it by computing the singular value decomposition of A as shown in equation \ref{eqn:SVD}. + +\begin{flalign} +\matr{A} &= +\underset{m \times n}{ + \begin{bmatrix} + x_{11} & x_{12} & \dots & x_{1n} \\ + x_{21} & x_{22} & \dots & x_{2n} \\ + \vdots & \vdots & \ddots & \vdots \\ + x_{m1} & x_{n2} & \dots & x_{mn} + \end{bmatrix} +} \nonumber \\ +&= \matr{U} \matr{\Sigma} \transpose{\matr{V}} \label{eqn:SVD} \\ +\text{where} \nonumber \\ +\matr{U} &= \underset{m \times m}{ + \begin{bmatrix} + | & | & \ & | \\ + \vecr{u}_{1} & \vecr{u}_{2} & \dots & \vecr{u}_{m} \\ + | & | & \ & | + \end{bmatrix} +} & \longeqncomment{\mathmatr{U} is orthonormal}{$\vecr{u}_{i}$ are the left singular vectors of \mathmatr{A}} \nonumber \\ +\matr{\Sigma} &= +\underset{m \times n}{ +\begin{bmatrix} +\sigma_{1} & 0 & \dots & 0 \\ +0 & \sigma_{2} & \dots & 0 \\ +\vdots & \vdots & \ddots & \vdots \\ +0 & 0 & \dots & \sigma_{n} \\ +0 & 0 & \vdots & 0 \\ +\end{bmatrix}} & \eqncomment{$\sigma_{i}$ are called the singular values of \mathmatr{A}}\nonumber \\ +\transpose{\matr{V}} &= +\underset{n \times n}{ + \begin{bmatrix} + \rule[.5ex]{3.5em}{0.4pt} \transpose{\vecr{v}_{1}} \rule[.5ex]{3.5em}{0.4pt} \\ + \rule[.5ex]{3.5em}{0.4pt} \transpose{\vecr{v}_{2}} \rule[.5ex]{3.5em}{0.4pt} \\ + \vdots \\ + \rule[.5ex]{3.5em}{0.4pt} \transpose{\vecr{v}_{n}} \rule[.5ex]{3.5em}{0.4pt} \\ + \end{bmatrix} +} & \longeqncomment{\mathmatr{V} is orthonormal}{$\vecr{v}_{i}$ are the right singular vectors of \mathmatr{A}} \nonumber +\end{flalign} + +The solution to our constrained least squares minimization problem is given by the column of \mathmatr{V} corresponding to the smallest singular value of \mathmatr{\Sigma}. Note, our constraint $\vecr{x}:\norm{\vecr{x}}^2 = 1$ is satisfied because \mathmatr{V} is orthonormal. + +\subsection{Imposing Rank Constraints} +Suppose we are given \mathmatr{F} of shape $(m \times n)$ and an integer $r < \min(m, n)$, and our objective is to find a matrix \mathmatr{F'} of rank $r$ that is closest to \mathmatr{F} from a least squares perspective. We can write the optimization problem as shown in equation \ref{eqn:enforcing rank constraints optimization problem}. + +\begin{flalign} +\underset{\matr{F'}: \rank{(\matr{F'})} = r}{\min} \norm{\matr{F} \matr{F'}}^2 \label{eqn:enforcing rank constraints optimization problem} +\end{flalign} + +We can find \mathmatr{F'} using equation \ref{eqn:solution for F'} as follows: + +\begin{flalign} +\matr{F} &= \matr{U} \matr{\Sigma} \transpose{\matr{V}} & \eqncomment{Compute $\svd{(\matr{F})}$}\nonumber \\ +\matr{\Sigma'} &= \text{top}(\matr{\Sigma}, r) & \longeqncomment{Keep only the top $r$ singular values in $\matr{\Sigma}$}{Set remaining singular values as zero} \nonumber \\ +\matr{F'} &= \matr{U} \matr{\Sigma'} \transpose{\matr{V}} \label{eqn:solution for F'} +\end{flalign} + +\section{Non-linear Least Squares} +Given a non-linear function $\vecr{f(x)}$ where $f \colon \mathbb{R}^{n} \to \mathbb{R}^{m}$ and a column vector \mathvecr{b} of size $(m \times 1)$, our objective is to find \mathvecr{x} that minimizes $\norm{\vecr{f(x)} - \vecr{b}}^2$. As $\vecr{f(x)}$ is a non-linear function, we do not have an analytical single shot solution. Instead, we will be applying an iterative gradient descent algorithm to arrive at our solution. + +\begin{flalign} +\underset{\vecr{x}}{\min} \norm{\vecr{f(x)} - \vecr{b}}^2 &= \underset{\vecr{x}}{\min}\ \transpose{(\vecr{f(x)} - \vecr{b})}(\vecr{f(x)} - \vecr{b}) \nonumber \\ +&= \underset{\vecr{x}}{\min}\ \transpose{\vecr{f(x)}}\vecr{f(x)} -2\transpose{\vecr{b}}\vecr{f(x)} + \transpose{\vecr{b}}\vecr{b} \nonumber \\ +&= \underset{\vecr{x}}{\min}\ \transpose{\vecr{f(x)}}\vecr{f(x)} -2\transpose{\vecr{b}}\vecr{f(x)} & \eqncomment{$\transpose{\vecr{b}}\vecr{b}$ is independent of \mathvecr{x}} \nonumber +\end{flalign} + +Suppose we start our gradient descent process with a guess for \mathvecr{x} say $\vecr{x}_0$. Our objective will be to iteratively move in the opposite direction of the maximum gradient such that $\vecr{f(x_{i+1})} < \vecr{f(x_i)}$. We continue iterating until a stopping criteria is reached. Let our loss function be $L = \transpose{\vecr{f(x)}}\vecr{f(x)} -2\transpose{\vecr{b}}\vecr{f(x)}$. Let us now compute the partial derivative of $L$ with respect to \mathvecr{x}. +\begin{flalign} +L &= \transpose{\vecr{f(x)}}\vecr{f(x)} -2\transpose{\vecr{b}}\vecr{f(x)} & \nonumber \\ +\frac{\partial L}{\partial \vecr{x}} &= 2\transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}}\vecr{f(x)} - \transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}}\vecr{b} \\[1em] +\text{where} \nonumber \\ +\frac{\partial \vecr{f(x)}}{\partial \vecr{x}} &= +\begin{bmatrix} +\frac{\partial f_{1}}{\partial x_{1}} & \dots & \frac{\partial f_{1}}{\partial x_{n}} \\ +\vdots & \ddots & \vdots \\ +\frac{\partial f_{m}}{\partial x_{1}} & \dots & \frac{\partial f_{m}}{\partial x_{n}} +\end{bmatrix} & \eqncomment{Jacobian matrix in numerator layout \cite{wiki:Matrix_calculus}} \nonumber +\nonumber +\end{flalign} + +At a minima or maxima, the partial derivative of L with respect to x will be zero. +\begin{flalign} +\frac{\partial L}{\partial \vecr{x}}\at[\bigg]{\vecr{x}^*} &= 2\transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}}\vecr{f(x)} - \transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}}\vecr{b} = 0 \label{eqn:derivative of non-linear least squares loss function}. +\end{flalign} + +If we approximate \mathvecr{f(x)} at \mathvecr{x_i} as a linear function \cite{wiki:Taylor's_theorem}, we get: +\begin{flalign} +\vecr{f(x_i+ \Delta \vecr{x})} \approx \vecr{f(x_i)} + \frac{\partial \vecr{f(x)}}{\partial \vecr{x}}\at[\bigg]{\vecr{x}=\vecr{x_i}} \Delta \vecr{x} \label{eqn:linear approximation} +\end{flalign} + +Plugging equation \ref{eqn:linear approximation} into equation \ref{eqn:derivative of non-linear least squares loss function} \cite{cis580slides}, we get: +\begin{flalign} +& 2\transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}}(\vecr{f(x) + \frac{\partial \vecr{f(x)}}{\partial \vecr{x}}\Delta \vecr{x}}) - 2\transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}}\vecr{b} = \vecr{0} \nonumber \\[1em] +\implies & \transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}} \vecr{f(x)} + \transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}}\frac{\partial \vecr{f(x)}}{\partial \vecr{x}} \Delta \vecr{x} - \transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}} \vecr{b} = \vecr{0} \nonumber \\[1em] +\implies & \transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}}\frac{\partial \vecr{f(x)}}{\partial \vecr{x}} \Delta \vecr{x} = \transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}} \vecr{b} - \transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}}\vecr{f(x)} \nonumber \\[1em] +\implies & \transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}}\frac{\partial \vecr{f(x)}}{\partial \vecr{x}} \Delta \vecr{x} = \transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}}(\vecr{b} - \vecr{f(x)}) \nonumber \\[1em] +\implies & \Delta \vecr{x} = (\transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}}\frac{\partial \vecr{f(x)}}{\partial \vecr{x}})^{-1}\transpose{\frac{\partial \vecr{f(x)}}{\partial \vecr{x}}}(\vecr{b} - \vecr{f(x)}) \label{eqn:non-linear least squares step size} +\end{flalign} + +Thus, using equation \ref{eqn:non-linear least squares step size}, given $\vecr{x}_i$ at iteration $i$, we can compute $\vecr{x}_{i+1}=\vecr{x}_i + \Delta \vecr{x}$. By the way, notice, similarity of equation \ref{eqn:non-linear least squares step size} to the solution for heterogeneous linear system in equation \ref{eqn:heterogeneous linear system solution}. +% References +\newpage +\bibliographystyle{unsrt} +\bibliography{references} +\end{document} diff --git a/00-prologue/latexmkrc b/00-prologue/latexmkrc new file mode 100644 index 0000000..4230195 --- /dev/null +++ b/00-prologue/latexmkrc @@ -0,0 +1,179 @@ +# Settings +$xdvipdfmx = "xdvipdfmx -z 6 -o %D %O %S"; + +############################### +# Post processing of pdf file # +############################### + +# assume the jobname is 'output' for sharelatex +my $ORIG_PDF_AGE = -M "output.pdf"; # get age of existing pdf if present + +END { + my $NEW_PDF_AGE = -M "output.pdf"; + return if !defined($NEW_PDF_AGE); # bail out if no pdf file + return if defined($ORIG_PDF_AGE) && $NEW_PDF_AGE == $ORIG_PDF_AGE; # bail out if pdf was not updated + $qpdf //= "/usr/local/bin/qpdf"; + $qpdf = $ENV{QPDF} if defined($ENV{QPDF}) && -x $ENV{QPDF}; + return if ! -x $qpdf; # check that qpdf exists + $qpdf_opts //= "--linearize --newline-before-endstream"; + $qpdf_opts = $ENV{QPDF_OPTS} if defined($ENV{QPDF_OPTS}); + my $status = system($qpdf, split(' ', $qpdf_opts), "output.pdf", "output.pdf.opt"); + my $exitcode = ($status >> 8); + print "qpdf exit code=$exitcode\n"; + # qpdf returns 0 for success, 3 for warnings (output pdf still created) + return if !($exitcode == 0 || $exitcode == 3); + print "Renaming optimised file to output.pdf\n"; + rename("output.pdf.opt", "output.pdf"); +} + +############## +# Glossaries # +############## +add_cus_dep( 'glo', 'gls', 0, 'glo2gls' ); +add_cus_dep( 'acn', 'acr', 0, 'glo2gls'); # from Overleaf v1 +sub glo2gls { + system("makeglossaries $_[0]"); +} + +############# +# makeindex # +############# +@ist = glob("*.ist"); +if (scalar(@ist) > 0) { + $makeindex = "makeindex -s $ist[0] %O -o %D %S"; +} + +################ +# nomenclature # +################ +add_cus_dep("nlo", "nls", 0, "nlo2nls"); +sub nlo2nls { + system("makeindex $_[0].nlo -s nomencl.ist -o $_[0].nls -t $_[0].nlg"); +} + +######### +# Knitr # +######### +my $root_file = $ARGV[-1]; + +add_cus_dep( 'Rtex', 'tex', 0, 'rtex_to_tex'); +sub rtex_to_tex { + do_knitr("$_[0].Rtex"); +} + +sub do_knitr { + my $dirname = dirname $_[0]; + my $basename = basename $_[0]; + system("Rscript -e \"library('knitr'); setwd('$dirname'); knit('$basename')\""); +} + +my $rtex_file = $root_file =~ s/\.tex$/.Rtex/r; +unless (-e $root_file) { + if (-e $rtex_file) { + do_knitr($rtex_file); + } +} + +########## +# feynmf # +########## +push(@file_not_found, '^feynmf: Files .* and (.*) not found:$'); +add_cus_dep("mf", "tfm", 0, "mf_to_tfm"); +sub mf_to_tfm { system("mf '\\mode:=laserjet; input $_[0]'"); } + +push(@file_not_found, '^feynmf: Label file (.*) not found:$'); +add_cus_dep("mf", "t1", 0, "mf_to_label1"); +sub mf_to_label1 { system("mf '\\mode:=laserjet; input $_[0]' && touch $_[0].t1"); } +add_cus_dep("mf", "t2", 0, "mf_to_label2"); +sub mf_to_label2 { system("mf '\\mode:=laserjet; input $_[0]' && touch $_[0].t2"); } +add_cus_dep("mf", "t3", 0, "mf_to_label3"); +sub mf_to_label3 { system("mf '\\mode:=laserjet; input $_[0]' && touch $_[0].t3"); } +add_cus_dep("mf", "t4", 0, "mf_to_label4"); +sub mf_to_label4 { system("mf '\\mode:=laserjet; input $_[0]' && touch $_[0].t4"); } +add_cus_dep("mf", "t5", 0, "mf_to_label5"); +sub mf_to_label5 { system("mf '\\mode:=laserjet; input $_[0]' && touch $_[0].t5"); } +add_cus_dep("mf", "t6", 0, "mf_to_label6"); +sub mf_to_label6 { system("mf '\\mode:=laserjet; input $_[0]' && touch $_[0].t6"); } +add_cus_dep("mf", "t7", 0, "mf_to_label7"); +sub mf_to_label7 { system("mf '\\mode:=laserjet; input $_[0]' && touch $_[0].t7"); } +add_cus_dep("mf", "t8", 0, "mf_to_label8"); +sub mf_to_label8 { system("mf '\\mode:=laserjet; input $_[0]' && touch $_[0].t8"); } +add_cus_dep("mf", "t9", 0, "mf_to_label9"); +sub mf_to_label9 { system("mf '\\mode:=laserjet; input $_[0]' && touch $_[0].t9"); } + +########## +# feynmp # +########## +push(@file_not_found, '^dvipdf: Could not find figure file (.*); continuing.$'); +add_cus_dep("mp", "1", 0, "mp_to_eps"); +sub mp_to_eps { + system("mpost $_[0]"); + return 0; +} + +############# +# asymptote # +############# +sub asy {return system("asy --offscreen '$_[0]'");} +add_cus_dep("asy","eps",0,"asy"); +add_cus_dep("asy","pdf",0,"asy"); +add_cus_dep("asy","tex",0,"asy"); + +############# +# metapost # # from Overleaf v1 +############# +add_cus_dep('mp', '1', 0, 'mpost'); +sub mpost { + my $file = $_[0]; + my ($name, $path) = fileparse($file); + pushd($path); + my $return = system "mpost $name"; + popd(); + return $return; +} + +########## +# chktex # +########## +unlink 'output.chktex' if -f 'output.chktex'; +if (defined $ENV{'CHKTEX_OPTIONS'}) { + use File::Basename; + use Cwd; + + # identify the main file + my $target = $ARGV[-1]; + my $file = basename($target); + + if ($file =~ /\.tex$/) { + # change directory for a limited scope + my $orig_dir = cwd(); + my $subdir = dirname($target); + chdir($subdir); + # run chktex on main file + $status = system("/usr/bin/run-chktex.sh", $orig_dir, $file); + # go back to original directory + chdir($orig_dir); + + # in VALIDATE mode we always exit after running chktex + # otherwise we exit if EXIT_ON_ERROR is set + + if ($ENV{'CHKTEX_EXIT_ON_ERROR'} || $ENV{'CHKTEX_VALIDATE'}) { + # chktex doesn't let us access the error info via exit status + # so look through the output + open(my $fh, "<", "output.chktex"); + my $errors = 0; + { + local $/ = "\n"; + while(<$fh>) { + if (/^\S+:\d+:\d+: Error:/) { + $errors++; + print; + } + } + } + close($fh); + exit(1) if $errors > 0; + exit(0) if $ENV{'CHKTEX_VALIDATE'}; + } + } +} diff --git a/00-prologue/references.bib b/00-prologue/references.bib new file mode 100644 index 0000000..4fdc50e --- /dev/null +++ b/00-prologue/references.bib @@ -0,0 +1,87 @@ +@misc{wiki:systemoflinearequations, + author = "Wikipedia", + title = "System of Linear Equations --- {Wikipedia}{,} The Free Encyclopedia", + year = "2023", + url = "https://en.wikipedia.org/wiki/System_of_linear_equations", + note = "[Online; accessed 25-March-2023]" +} + +@book{semple1952, + title={Algebraic Projective Geometry}, + author={Semple, JG and Kneebone, GT}, + year={1952}, + publisher={Clarendon Press} +} + +@misc{wiki:Euclid's_Elements, + author = "Wikipedia", + title = "{Euclid's Elements} --- {W}ikipedia{,} The Free Encyclopedia", + year = "2023", + url = "http://en.wikipedia.org/w/index.php?title=Euclid's\%20Elements", + note = "[Online; accessed 26-March-2023]" + } + +@misc{wiki:Erlangen_program, + author = "Wikipedia", + title = "{Erlangen program} --- {W}ikipedia{,} The Free Encyclopedia", + year = "2023", + url = "http://en.wikipedia.org/w/index.php?title=Erlangen\%20program", + note = "[Online; accessed 26-March-2023]" + } + +@book{wylie2008, + author = {Wylie Jr., C. R.}, + title = {Introduction to Projective Geometry}, + publisher = {Dover Publications}, + year = {2008}, + isbn = {048646895X}, + url = {https://www.amazon.com/Introduction-Projective-Geometry-Dover-Mathematics/dp/048646895X} +} + +@misc{wildberger2011projective, + author = {Wildberger, N J}, + title = {Projective geometry and homogeneous coordinates | WildTrig: Intro to Rational Trigonometry}, + year = {2009}, + howpublished = {YouTube video}, + url = {https://www.youtube.com/watch?v=q3turHmOWq4} +} + +@misc{stachniss_homogeneous_coordinates, + author = {Stachniss, Cyrill}, + title = {Homogeneous Coordinates}, + year = {2020}, + howpublished = {YouTube video}, + url = {https://www.youtube.com/watch?v=MQdm0Z_gNcw} +} + +@misc{wiki:Newton's_method_in_optimization, + author = "Wikipedia", + title = "{Newton's method in optimization} --- {W}ikipedia{,} The Free Encyclopedia", + year = "2023", + url = "http://en.wikipedia.org/w/index.php?title=Newton's\%20method\%20in\%20optimization", + note = "[Online; accessed 27-March-2023]" + } + + @misc{wiki:Matrix_calculus, + author = "Wikipedia", + title = "{Matrix calculus} --- {W}ikipedia{,} The Free Encyclopedia", + year = "2023", + url = "http://en.wikipedia.org/w/index.php?title=Matrix\%20calculus", + note = "[Online; accessed 27-March-2023]" + } + + @misc{wiki:Taylor's_theorem, + author = "Wikipedia", + title = "{Taylor's theorem} --- {W}ikipedia{,} The Free Encyclopedia", + year = "2023", + url = "https://en.wikipedia.org/wiki/Taylor\%27s_theorem", + note = "[Online; accessed 27-March-2023]" + } + + @misc{cis580slides, + author = "HyunSoo Park", + title = "Nonlinear Least Squares and Bundle Adjustment", + url = "https://www.cis.upenn.edu/~cis580/Spring2015/Lectures/cis580-16-BundleAdjustment.pdf", + year = "2015", + note = "Slides from CIS 580: Machine Perception, Spring 2015", +}