(* Content-type: application/mathematica *) (*** Wolfram Notebook File ***) (* http://www.wolfram.com/nb *) (* CreatedBy='Mathematica 6.0' *) (*CacheID: 234*) (* Internal cache information: NotebookFileLineBreakTest NotebookFileLineBreakTest NotebookDataPosition[ 145, 7] NotebookDataLength[ 38691, 1146] NotebookOptionsPosition[ 35188, 1045] NotebookOutlinePosition[ 35687, 1064] CellTagsIndexPosition[ 35644, 1061] WindowFrame->Normal ContainsDynamic->False*) (* Beginning of Notebook Content *) Notebook[{ Cell[CellGroupData[{ Cell["Matrix MLE for Linear Regression", "Title", CellChangeTimes->{{3.400361179830015*^9, 3.4003611953614635`*^9}}], Cell[CellGroupData[{ Cell["Joseph E. Gonzalez", "Author", CellChangeTimes->{{3.4003612046272078`*^9, 3.4003612072991166`*^9}}], Cell["\<\ Some people have had some trouble with the linear algebra form of the MLE for \ multiple regression. I tried to find a nice online derivation but I could \ not find anything helpful. So I have decide to derive the matrix form for \ the MLE weights for linear regression under the assumption of Gaussian noise. \ \ \>", "Text", CellChangeTimes->{{3.4003612157992253`*^9, 3.400361344253995*^9}, { 3.400365671779554*^9, 3.400365673080758*^9}, {3.4003657080603027`*^9, 3.400365726936326*^9}}] }, Open ]], Cell[CellGroupData[{ Cell["The Model", "Section", CellChangeTimes->{{3.4003613510197062`*^9, 3.400361359019809*^9}}], Cell[TextData[{ "Lets say we are given some set of data ", Cell[BoxData[ FormBox["X", TraditionalForm]]], " and ", Cell[BoxData[ FormBox["y", TraditionalForm]]], ". The matrix ", Cell[BoxData[ FormBox["X", TraditionalForm]]], " has ", Cell[BoxData[ FormBox["n", TraditionalForm]]], " rows corresponding to each of the examples and ", Cell[BoxData[ FormBox["d", TraditionalForm]]], " columns corresponding to each of the ", Cell[BoxData[ FormBox["d", TraditionalForm]]], " features. The column vector ", Cell[BoxData[ FormBox["y", TraditionalForm]]], " consists has ", Cell[BoxData[ FormBox["n", TraditionalForm]]], " rows corresponding to each of the examples and 1 column. We want to \ \"learn\" the relationship between an individual feature vector ", Cell[BoxData[ FormBox["x", TraditionalForm]]], " and an outcome ", Cell[BoxData[ FormBox["y", TraditionalForm]]], ". In some sense we want to learn the function ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"f", ":", SuperscriptBox["\[DoubleStruckCapitalR]", "d"]}], "\[Rule]", "\[DoubleStruckCapitalR]"}], TraditionalForm]]], " which satisfies:" }], "Text", CellChangeTimes->{{3.4003613607542057`*^9, 3.400361611366789*^9}, { 3.400361658086137*^9, 3.4003616589923983`*^9}, {3.400361858948083*^9, 3.4003619601993785`*^9}, {3.400362000543645*^9, 3.4003620078406134`*^9}, { 3.40036576239331*^9, 3.400365812720405*^9}}], Cell[BoxData[ FormBox[ RowBox[{ StyleBox[" ", FontSlant->"Italic"], RowBox[{"y", "=", RowBox[{"f", "(", "x", ")"}], " "}]}], TraditionalForm]], "EquationNumbered", CellChangeTimes->{{3.4003619628087873`*^9, 3.400362012262545*^9}, 3.4003620595131497`*^9, {3.4003658174390993`*^9, 3.400365823025799*^9}}], Cell[CellGroupData[{ Cell["Linear Models", "Subsubsubsection", CellChangeTimes->{{3.4003670162348223`*^9, 3.400367020408784*^9}}], Cell[TextData[{ "There are many functions ", Cell[BoxData[ FormBox["f", TraditionalForm]]], " that we could chose from (I am sure you have some favorites). To simplify \ our computation and to impose some assumptions (which often aids in \ generalization) we will restrict ", Cell[BoxData[ FormBox["f", TraditionalForm]]], " to the class of linear functions. That is for a choice of weights ", Cell[BoxData[ FormBox["w", TraditionalForm]]], " we can express ", Cell[BoxData[ FormBox["f", TraditionalForm]]], " as:" }], "Text", CellChangeTimes->{{3.4003659449940243`*^9, 3.4003660893568983`*^9}, 3.400370458731762*^9}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ SubscriptBox["f", "w"], "(", "x", ")"}], "=", RowBox[{ UnderoverscriptBox["\[Sum]", RowBox[{"j", "=", "1"}], "d"], RowBox[{ SubscriptBox["w", "j"], " ", SubscriptBox["x", "j"]}]}]}], TraditionalForm]], "EquationNumbered", CellChangeTimes->{{3.4003660929585238`*^9, 3.40036610971426*^9}, { 3.4003662234321337`*^9, 3.400366223859467*^9}}] }, Open ]], Cell[CellGroupData[{ Cell["Nonlinear Features", "Subsubsubsection", CellChangeTimes->{{3.4003670251885433`*^9, 3.400367028350418*^9}}], Cell[TextData[{ "Often people find this assumption to restrictive. We can permit a more \ complex class of functions by creating new (nonlinear) features from the \ original features ", Cell[BoxData[ FormBox[ SubscriptBox["x", "j"], TraditionalForm]]], ". For example:" }], "Text", CellChangeTimes->{{3.400366133343103*^9, 3.400366212737278*^9}, { 3.400366329705069*^9, 3.400366350310103*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ SubscriptBox["f", "w"], "(", "x", ")"}], "=", RowBox[{ RowBox[{ UnderoverscriptBox["\[Sum]", RowBox[{"j", "=", "1"}], "d"], RowBox[{ SubscriptBox["w", "j"], " ", SubscriptBox["x", "j"]}]}], " ", "+", " ", RowBox[{ UnderoverscriptBox["\[Sum]", RowBox[{"j", "=", RowBox[{"d", "+", "1"}]}], RowBox[{"2", "d"}]], RowBox[{ SubscriptBox["w", "j"], " ", RowBox[{"Sin", "[", SubsuperscriptBox["x", "j", "2"], "]"}]}]}]}]}], TraditionalForm]], "EquationNumbered", CellChangeTimes->{{3.4003660929585238`*^9, 3.40036610971426*^9}, { 3.4003662267537603`*^9, 3.400366283919821*^9}}], Cell["To formalize this notion we can rewrite equation 3 as:", "Text", CellChangeTimes->{{3.4003663528696938`*^9, 3.400366367278035*^9}, { 3.400366452349503*^9, 3.400366471997416*^9}, {3.4003670960412893`*^9, 3.400367096206251*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ SubscriptBox["f", "w"], "(", "x", ")"}], "=", RowBox[{ UnderoverscriptBox["\[Sum]", RowBox[{"j", "=", "1"}], "m"], RowBox[{ SubscriptBox["w", "j"], " ", RowBox[{ SubscriptBox["\[Phi]", "j"], "[", "x", "]"}], " "}]}]}], TraditionalForm]], "EquationNumbered", CellChangeTimes->{{3.4003660929585238`*^9, 3.40036610971426*^9}, { 3.4003662267537603`*^9, 3.400366283919821*^9}, {3.400366373936289*^9, 3.400366417555853*^9}}], Cell["\<\ Returning to the example in equation 3 we can use the notation of equation 4 \ by defining:\ \>", "Text", CellChangeTimes->{{3.400366481124503*^9, 3.4003664996784153`*^9}, { 3.4003671014302177`*^9, 3.400367106478098*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ SubscriptBox["\[Phi]", RowBox[{"j", " "}]], "[", "x", "]"}], "=", RowBox[{"\[Piecewise]", GridBox[{ { SubscriptBox["x", "j"], RowBox[{ RowBox[{"if", " ", "1"}], "\[LessEqual]", "j", "\[LessEqual]", "d"}]}, { RowBox[{"Sin", "[", SubsuperscriptBox["x", "j", "2"], "]"}], RowBox[{ RowBox[{ RowBox[{"if", " ", "d"}], "+", "1"}], "\[LessEqual]", "j", "\[LessEqual]", RowBox[{"2", "d"}]}]}, {"0", "otherwise"} }]}]}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003665057907248`*^9, 3.400366575639027*^9}}], Cell[TextData[{ "This technique allows us to lift our simple linear function ", Cell[BoxData[ FormBox[ SubscriptBox["f", "w"], TraditionalForm]]], " into a more complex space permitting a richer class of functions in our \ original space ", Cell[BoxData[ FormBox[ SuperscriptBox["\[DoubleStruckCapitalR]", "d"], TraditionalForm]]], ". With this transformation we can define a matrix \[CapitalPhi] which is \ like ", Cell[BoxData[ FormBox["X", TraditionalForm]]], " but consists of the transformed features. If we do not want to transform \ our features then we simply define:" }], "Text", CellChangeTimes->{{3.4003665809451303`*^9, 3.400366735853085*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ SubscriptBox["\[Phi]", RowBox[{"j", " "}]], "[", "x", "]"}], "=", RowBox[{"\[Piecewise]", GridBox[{ { SubscriptBox["x", "j"], RowBox[{ RowBox[{"if", " ", "1"}], "\[LessEqual]", "j", "\[LessEqual]", "d"}]}, {"0", "otherwise"} }]}]}], TraditionalForm]], "EquationNumbered", CellChangeTimes->{{3.4003665057907248`*^9, 3.400366575639027*^9}, { 3.400366747683853*^9, 3.400366754980257*^9}}], Cell["\<\ The matrix \[CapitalPhi] is constructed by:\ \>", "Text", CellChangeTimes->{{3.400366763836389*^9, 3.400366774791952*^9}, 3.400370464427367*^9}], Cell[BoxData[ FormBox[ RowBox[{"\[CapitalPhi]", "=", RowBox[{"(", GridBox[{ { RowBox[{ SubscriptBox["\[Phi]", "1"], "[", RowBox[{ SubscriptBox["X", "11"], ",", "\[Ellipsis]", ",", " ", SubscriptBox["X", RowBox[{"1", "d"}]]}], "]"}], "\[Ellipsis]", RowBox[{ SubscriptBox["\[Phi]", "m"], "[", RowBox[{ SubscriptBox["X", "11"], ",", "\[Ellipsis]", ",", " ", SubscriptBox["X", RowBox[{"1", "d"}]]}], "]"}]}, {"...", "...", "..."}, { RowBox[{ SubscriptBox["\[Phi]", "1"], "[", RowBox[{ SubscriptBox["X", "n1"], ",", "\[Ellipsis]", ",", " ", SubscriptBox["X", "nd"]}], "]"}], "\[Ellipsis]", RowBox[{ SubscriptBox["\[Phi]", "m"], "[", RowBox[{ SubscriptBox["X", "n1"], ",", "\[Ellipsis]", ",", " ", SubscriptBox["X", "nd"]}], "]"}]} }], ")"}]}], TraditionalForm]], "EquationNumbered", CellChangeTimes->{{3.400366776493473*^9, 3.400366906347658*^9}}], Cell["\<\ If we use the trivial transform in equation 5 equation 6 becomes:\ \>", "Text", CellChangeTimes->{{3.400366919841609*^9, 3.400366947318047*^9}, { 3.40036711576357*^9, 3.400367119024541*^9}}], Cell[BoxData[ FormBox[ RowBox[{"\[CapitalPhi]", "=", RowBox[{ RowBox[{"(", GridBox[{ { SubscriptBox["X", "11"], "\[Ellipsis]", SubscriptBox["X", RowBox[{"1", "d"}]]}, {"...", "...", "..."}, { SubscriptBox["X", "n1"], "\[Ellipsis]", SubscriptBox["X", "nd"]} }], ")"}], "=", "X"}]}], TraditionalForm]], "EquationNumbered", CellChangeTimes->{{3.400366776493473*^9, 3.400366906347658*^9}, { 3.400366953920806*^9, 3.400366981458768*^9}}], Cell[TextData[{ "For the rest of these notes I will use the trivial feature space ", Cell[BoxData[ FormBox["X", TraditionalForm]]], ". However feel free to substitute \[CapitalPhi] where ever ", Cell[BoxData[ FormBox["X", TraditionalForm]]], " is used if a nonlinear feature space is desired." }], "Text", CellChangeTimes->{{3.400367869857821*^9, 3.4003679214807262`*^9}}] }, Open ]], Cell[CellGroupData[{ Cell["Noise", "Subsubsubsection", CellChangeTimes->{{3.400367050198448*^9, 3.4003670505633993`*^9}}], Cell[TextData[{ "Sadly we live in the real world where there is random noise ", Cell[BoxData[ FormBox["\[Epsilon]", TraditionalForm]]], " that gets mixed into our observations. So a more natural model would be \ of the form:" }], "Text", CellChangeTimes->{{3.4003620259814706`*^9, 3.400362077888385*^9}, { 3.400365839154955*^9, 3.400365929456986*^9}, 3.400370466687189*^9}], Cell[BoxData[ FormBox[ RowBox[{ StyleBox[" ", FontSlant->"Italic"], RowBox[{"y", "=", RowBox[{ RowBox[{ SubscriptBox["f", "w"], "(", "x", ")"}], " ", "+", "\[Epsilon]"}]}]}], TraditionalForm]], "EquationNumbered", CellChangeTimes->{{3.4003619628087873`*^9, 3.400362012262545*^9}, 3.4003620595131497`*^9, {3.4003658174390993`*^9, 3.400365823025799*^9}, { 3.400365934098527*^9, 3.4003659351108723`*^9}, {3.400367152300509*^9, 3.400367154301012*^9}}], Cell["\<\ We have to pick what type of noise we expect to observe. A common choice is \ 0 mean independent gaussian noise of the form:\ \>", "Text", CellChangeTimes->{{3.40036713929016*^9, 3.400367250696171*^9}, { 3.400367303809421*^9, 3.400367306666058*^9}}], Cell[BoxData[ FormBox[ RowBox[{"\[Epsilon]", "~", RowBox[{"N", "(", RowBox[{"0", ",", "\[Sigma]"}], ")"}]}], TraditionalForm]], "Equation", CellChangeTimes->{{3.400367222510096*^9, 3.40036723117283*^9}}] }, Open ]], Cell[CellGroupData[{ Cell[TextData[{ "Which ", Cell[BoxData[ FormBox[ SubscriptBox["f", "w"], TraditionalForm]]] }], "Subsubsubsection", CellChangeTimes->{{3.400367412391802*^9, 3.400367419974308*^9}}], Cell[TextData[{ "Having selected the feature transformation \[Phi] and having decided to use \ a linear model we have reduced our hypothesis space (the space of functions \ we are willing to consider for ", Cell[BoxData[ FormBox["f", TraditionalForm]]], ") from all the functions (and then some) to linear functions in the feature \ space determined by \[Phi]. The functions in this space are indexed by ", Cell[BoxData[ FormBox["w", TraditionalForm]]], " (the weight vector). How do we pick ", Cell[BoxData[ FormBox["f", TraditionalForm]]], " from this reduced hypothesis space? We simply choose the \"best\" ", Cell[BoxData[ FormBox["w", TraditionalForm]]], ". For the remainder of these notes we will be describing how to choose the \ ", Cell[BoxData[ FormBox["w", TraditionalForm]]], " that maximizes the likelihood of our data ", Cell[BoxData[ FormBox["X", TraditionalForm]]], " and y." }], "Text", CellChangeTimes->{{3.4003674222629757`*^9, 3.4003677174691067`*^9}, 3.4003704765447407`*^9}] }, Open ]] }, Open ]], Cell[CellGroupData[{ Cell["Matrix Notation", "Section", CellChangeTimes->{{3.400367743493659*^9, 3.400367750307901*^9}}], Cell["\<\ Lets begin with some linear algebra. We can apply our model to the data in \ the following ways:\ \>", "Text", CellChangeTimes->{{3.400367755083963*^9, 3.400367765461163*^9}, { 3.4003677956015463`*^9, 3.4003678227521753`*^9}, {3.400367946752404*^9, 3.400368010794841*^9}, {3.400368205929975*^9, 3.400368214102006*^9}}], Cell[BoxData[ FormBox[ RowBox[{"y", "=", RowBox[{ RowBox[{"(", GridBox[{ { SubscriptBox["y", "1"]}, {"\[Ellipsis]"}, { SubscriptBox["y", "n"]} }], ")"}], "=", RowBox[{ RowBox[{"(", GridBox[{ { RowBox[{ RowBox[{ SubscriptBox["f", "w"], "(", RowBox[{ RowBox[{"<", SubscriptBox["X", "11"]}], ",", "\[Ellipsis]", ",", RowBox[{ SubscriptBox["X", RowBox[{"1", "d"}]], ">"}]}], ")"}], " ", "+", SubscriptBox["\[Epsilon]", "1"]}]}, {"\[Ellipsis]"}, { RowBox[{ RowBox[{ SubscriptBox["f", "w"], "(", RowBox[{ RowBox[{"<", SubscriptBox["X", "n1"]}], ",", "\[Ellipsis]", ",", RowBox[{ SubscriptBox["X", "nd"], ">"}]}], ")"}], "+", SubscriptBox["\[Epsilon]", "n"]}]} }], ")"}], "=", RowBox[{ RowBox[{"(", GridBox[{ { RowBox[{ RowBox[{ UnderoverscriptBox["\[Sum]", RowBox[{"j", "=", "1"}], "d"], RowBox[{ SubscriptBox["w", "j"], SubscriptBox["X", RowBox[{"1", "j"}]]}]}], " ", "+", SubscriptBox["\[Epsilon]", "1"]}]}, {"\[Ellipsis]"}, { RowBox[{ RowBox[{ UnderoverscriptBox["\[Sum]", RowBox[{"j", "=", "1"}], "d"], RowBox[{ SubscriptBox["w", "j"], SubscriptBox["X", "nj"]}]}], "+", SubscriptBox["\[Epsilon]", "n"]}]} }], ")"}], "=", RowBox[{ RowBox[{"X", " ", "w"}], " ", "+", " ", "\[Epsilon]"}]}]}]}]}], TraditionalForm]], "EquationNumbered", CellChangeTimes->{{3.400367824948215*^9, 3.400367849461729*^9}, { 3.4003679737212963`*^9, 3.4003680663078127`*^9}, {3.400368136176868*^9, 3.400368189195472*^9}}], Cell[TextData[{ "where ", Cell[BoxData[ FormBox["w", TraditionalForm]]], " is a ", Cell[BoxData[ FormBox[ RowBox[{"d", "\[Cross]", "1"}], TraditionalForm]]], " column vector of weights and \[Epsilon] is a ", Cell[BoxData[ FormBox[ RowBox[{"d", "\[Cross]", "1"}], TraditionalForm]]], " column vector of iid ", Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox["\[Epsilon]", "i"], "~", RowBox[{"N", "(", RowBox[{"0", ",", "\[Sigma]"}], ")"}]}], TraditionalForm]]], " gaussian noise. Notice how we can compactly compute all the ", Cell[BoxData[ FormBox["y", TraditionalForm]]], " at once by simply multiplying ", Cell[BoxData[ FormBox[ RowBox[{"X", " ", "w"}], TraditionalForm]]], ". If we solve for the noise in equation 9 we obtain:" }], "Text", CellChangeTimes->{{3.400368073936141*^9, 3.400368119022229*^9}, { 3.4003682173324203`*^9, 3.400368341262298*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], " ", "=", " ", RowBox[{"\[Epsilon]", " ", "~", " ", RowBox[{"N", "(", RowBox[{"0", ",", " ", RowBox[{"\[Sigma]", " ", "I"}]}], ")"}]}]}], ";"}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003596606855702`*^9, 3.400359705498644*^9}, { 3.4003605835255075`*^9, 3.400360585197404*^9}, 3.400368351269988*^9}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}], " ", "~", " ", RowBox[{"N", "(", RowBox[{"0", ",", " ", RowBox[{"\[Sigma]", " ", "I"}]}], ")"}]}], ";"}], TraditionalForm]], "EquationNumbered", CellChangeTimes->{{3.4003596606855702`*^9, 3.400359705498644*^9}, { 3.4003605835255075`*^9, 3.400360585197404*^9}, {3.400368351269988*^9, 3.4003683675295763`*^9}}], Cell[TextData[{ "We see that the residual of our regression model follows a multivariate \ gaussian with covariance ", Cell[BoxData[ FormBox[ RowBox[{"\[Sigma]", " ", "I"}], TraditionalForm]]], " were ", Cell[BoxData[ FormBox["I", TraditionalForm]]], " is the identity matrix. The density of the multivariate Gaussian takes \ the form:" }], "Text", CellChangeTimes->{{3.400368374011622*^9, 3.400368547776692*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"p", "(", "V", ")"}], "=", RowBox[{ FractionBox["1", RowBox[{ SuperscriptBox[ RowBox[{"(", RowBox[{"2", "\[Pi]"}], ")"}], RowBox[{"N", "/", "2"}]], "|", "\[CapitalSigma]", SuperscriptBox["|", RowBox[{"1", "/", "2"}]]}]], RowBox[{"Exp", "[", RowBox[{ RowBox[{"-", FractionBox["1", "2"]}], RowBox[{ RowBox[{"(", RowBox[{"V", "-", "\[Mu]"}], ")"}], "\[Transpose]"}], " ", RowBox[{ SuperscriptBox["\[CapitalSigma]", RowBox[{"-", "1"}]], "(", RowBox[{"V", "-", "\[Mu]"}], ")"}]}], "]"}]}]}], TraditionalForm]], "EquationNumbered", CellChangeTimes->{{3.400368552639093*^9, 3.4003685702767353`*^9}, { 3.400368604881785*^9, 3.400368660453011*^9}}], Cell[TextData[{ "where ", Cell[BoxData[ FormBox[ RowBox[{"V", "~", RowBox[{"N", "(", RowBox[{"\[Mu]", ",", "\[CapitalSigma]"}], ")"}], " "}], TraditionalForm]]], "and ", Cell[BoxData[ FormBox[ RowBox[{"V", "\[Element]", SuperscriptBox["\[DoubleStruckCapitalR]", RowBox[{"N", "\[Cross]", "1"}]]}], TraditionalForm]]], " is a column vector of size ", Cell[BoxData[ FormBox["N", TraditionalForm]]], "." }], "Text", CellChangeTimes->{{3.400368665179181*^9, 3.4003687271588078`*^9}}] }, Open ]], Cell[CellGroupData[{ Cell["Likelihood", "Section", CellChangeTimes->{{3.4003687288494062`*^9, 3.4003687304684067`*^9}}], Cell[TextData[{ "Using equation 10 and 11 we can express the likelihood of our data given \ our weights ", Cell[BoxData[ FormBox["w", TraditionalForm]]], " as:" }], "Text", CellChangeTimes->{{3.400368732164372*^9, 3.4003687587587147`*^9}, { 3.4003688584095383`*^9, 3.400368865358334*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"P", "(", RowBox[{"X", ",", RowBox[{"y", "|", "w"}]}], ")"}], " ", "\[Proportional]", RowBox[{"L", "(", "w", ")"}], "\[Proportional]", RowBox[{"Exp", "[", RowBox[{ RowBox[{"-", FractionBox["1", "2"]}], RowBox[{ RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}], "\[Transpose]"}], " ", FractionBox["1", "\[Sigma]"], "I", " ", RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}]}], "]"}]}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003597234519987`*^9, 3.4003597706713533`*^9}, { 3.400360588463071*^9, 3.400360593666262*^9}, {3.400360765152832*^9, 3.400360766684102*^9}, {3.400368775027933*^9, 3.400368780666164*^9}, { 3.400368852763527*^9, 3.4003688568828278`*^9}, {3.400368942909618*^9, 3.400368944110572*^9}, {3.40227625634375*^9, 3.4022762575625*^9}}], Cell["\<\ We now want to maximize the likelihood of our data given the weights. First \ we take the Log to make thinks easier\ \>", "Text", CellChangeTimes->{{3.40036887853356*^9, 3.400368936085945*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"l", "(", "w", ")"}], " ", "\[Proportional]", RowBox[{ RowBox[{ RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}], "\[Transpose]"}], " ", "I", " ", RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}]}]}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003597234519987`*^9, 3.4003597706713533`*^9}, { 3.400360588463071*^9, 3.400360593666262*^9}, {3.400360765152832*^9, 3.400360766684102*^9}, {3.400368775027933*^9, 3.400368780666164*^9}, { 3.400368852763527*^9, 3.4003688568828278`*^9}, {3.400368942909618*^9, 3.400369008357513*^9}, {3.400369077818438*^9, 3.40036908013701*^9}, 3.402276259953125*^9}], Cell["\<\ Notice that we can remove any additional multiplicative constants. We now \ have\ \>", "Text", CellChangeTimes->{{3.400369009919553*^9, 3.400369065741836*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"l", "(", "w", ")"}], "\[Proportional]", RowBox[{ UnderscriptBox[ RowBox[{ RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}], "\[Transpose]"}], UnderscriptBox["\[UnderBrace]", RowBox[{"row", " ", "vector"}]]], " ", UnderscriptBox["I", UnderscriptBox["\[UnderBrace]", RowBox[{"identity", " ", "Matrix"}]]], " ", UnderscriptBox[ RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}], UnderscriptBox["\[UnderBrace]", RowBox[{"col", " ", "vector"}]]]}]}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003597234519987`*^9, 3.4003597706713533`*^9}, { 3.400360588463071*^9, 3.400360593666262*^9}, {3.400360765152832*^9, 3.400360766684102*^9}, {3.400368775027933*^9, 3.400368780666164*^9}, { 3.400368852763527*^9, 3.4003688568828278`*^9}, {3.400368942909618*^9, 3.400369008357513*^9}, {3.400369077818438*^9, 3.4003691400841208`*^9}, 3.40227626125*^9}], Cell["\<\ You should be able to convince yourself that this is equivalent to:\ \>", "Text", CellChangeTimes->{{3.400369120490258*^9, 3.400369132460494*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"l", "(", "w", ")"}], "\[Proportional]", RowBox[{ RowBox[{ RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}], "\[Transpose]"}], " ", RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}]}]}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003597234519987`*^9, 3.400359798484209*^9}, { 3.4003606611827517`*^9, 3.400360665464056*^9}, 3.400360739590005*^9, { 3.400360782996811*^9, 3.400360785293715*^9}, {3.4003691458482027`*^9, 3.400369148200642*^9}, 3.4022762628125*^9}], Cell[TextData[{ "Now lets take the gradient (row vector) derivative with respect to ", Cell[BoxData[ FormBox["w", TraditionalForm]]], ":" }], "Text", CellChangeTimes->{{3.400369155119417*^9, 3.400369171165978*^9}, { 3.40227230290625*^9, 3.402272307484375*^9}, {3.402276092640625*^9, 3.4022761020625*^9}, {3.402276137359375*^9, 3.402276143203125*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ FractionBox["\[PartialD]", RowBox[{"\[PartialD]", "w"}]], " ", RowBox[{"l", "(", "w", ")"}]}], "\[Proportional]", RowBox[{ FractionBox["\[PartialD]", RowBox[{"\[PartialD]", "w"}]], "[", RowBox[{ RowBox[{ RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}], "\[Transpose]"}], " ", RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}]}], "]"}]}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003597234519987`*^9, 3.400359798484209*^9}, { 3.4003606611827517`*^9, 3.400360665464056*^9}, 3.400360739590005*^9, { 3.400360782996811*^9, 3.400360785293715*^9}, {3.4003691458482027`*^9, 3.400369148200642*^9}, {3.400369181952623*^9, 3.400369192137257*^9}, { 3.4003712405018587`*^9, 3.4003712478868*^9}, {3.40227611778125*^9, 3.402276126875*^9}, 3.40227626371875*^9}], Cell["\<\ To compute this we will use the gradient of a quadratic matrix equation.\ \[LineSeparator]For more details see \ http://en.wikipedia.org/wiki/Matrix_calculus \ \[LineSeparator]http://www.ee.ic.ac.uk/hp/staff/dmb/matrix/calculus.html#\ deriv_quad\ \>", "Text", CellChangeTimes->{{3.40227515334375*^9, 3.4022752015*^9}, { 3.402275378890625*^9, 3.402275405375*^9}}, TextJustification->0.], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ FractionBox["\[PartialD]", RowBox[{"\[PartialD]", "w"}]], " ", RowBox[{"l", "(", "w", ")"}]}], "\[Proportional]", RowBox[{ RowBox[{ RowBox[{"-", RowBox[{ RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}], "\[Transpose]"}]}], " ", "X"}], "-", " ", RowBox[{ RowBox[{ RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}], "\[Transpose]"}], " ", "X"}]}]}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003597234519987`*^9, 3.400359798484209*^9}, { 3.4003606611827517`*^9, 3.400360665464056*^9}, 3.400360739590005*^9, { 3.400360782996811*^9, 3.400360785293715*^9}, {3.4003691458482027`*^9, 3.400369148200642*^9}, {3.400369181952623*^9, 3.400369192137257*^9}, 3.4003692776161213`*^9, {3.400371251759026*^9, 3.400371266755034*^9}, { 3.402274029796875*^9, 3.40227403509375*^9}, {3.402275218125*^9, 3.402275230921875*^9}, 3.402276130265625*^9, 3.402276265296875*^9}], Cell["Simplifying a little", "Text", CellChangeTimes->{{3.400369831995401*^9, 3.400369835604148*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ FractionBox["\[PartialD]", RowBox[{"\[PartialD]", "w"}]], " ", RowBox[{"l", "(", "w", ")"}]}], "\[Proportional]", RowBox[{ RowBox[{"-", "2"}], RowBox[{ RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}], "\[Transpose]"}], " ", "X"}]}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003597234519987`*^9, 3.400359833469032*^9}, { 3.400360292412406*^9, 3.4003602936155467`*^9}, 3.400360740699394*^9, 3.4003607993720202`*^9, {3.400360861560316*^9, 3.4003608961232586`*^9}, { 3.4003609505927057`*^9, 3.4003610055465345`*^9}, {3.400369851484765*^9, 3.400369851712788*^9}, 3.400371277536051*^9, 3.402276152265625*^9, { 3.402276197390625*^9, 3.4022762001875*^9}, 3.402276267046875*^9}], Cell["Removing extraneous constants", "Text", CellChangeTimes->{{3.400369861053609*^9, 3.400369867893085*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ FractionBox["\[PartialD]", RowBox[{"\[PartialD]", "w"}]], " ", RowBox[{"l", "(", "w", ")"}]}], "\[Proportional]", RowBox[{ RowBox[{"-", RowBox[{ RowBox[{"(", RowBox[{"y", "-", RowBox[{"X", " ", "w"}]}], ")"}], "\[Transpose]"}]}], " ", "X"}]}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003597234519987`*^9, 3.400359833469032*^9}, { 3.400360292412406*^9, 3.4003602936155467`*^9}, 3.400360740699394*^9, 3.4003607993720202`*^9, {3.400360861560316*^9, 3.4003608961232586`*^9}, { 3.4003609505927057`*^9, 3.4003610055465345`*^9}, {3.400369851484765*^9, 3.400369851712788*^9}, 3.400371277536051*^9, 3.402276152265625*^9, { 3.402276197390625*^9, 3.402276226171875*^9}, 3.402276270375*^9}], Cell["Apply the tranpose", "Text", CellChangeTimes->{{3.402276291421875*^9, 3.402276296046875*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ FractionBox["\[PartialD]", RowBox[{"\[PartialD]", "w"}]], " ", RowBox[{"l", "(", "w", ")"}]}], "\[Proportional]", RowBox[{ RowBox[{"-", RowBox[{"(", RowBox[{ RowBox[{"y", "\[Transpose]"}], "-", RowBox[{ RowBox[{"w", "\[Transpose]"}], " ", RowBox[{"X", "\[Transpose]"}]}]}], ")"}]}], " ", "X"}]}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003597234519987`*^9, 3.400359833469032*^9}, { 3.400360292412406*^9, 3.4003602936155467`*^9}, 3.400360740699394*^9, 3.4003607993720202`*^9, {3.400360861560316*^9, 3.4003608961232586`*^9}, { 3.4003609505927057`*^9, 3.4003610055465345`*^9}, {3.400369851484765*^9, 3.400369851712788*^9}, 3.400371277536051*^9, 3.402276152265625*^9, { 3.402276197390625*^9, 3.402276226171875*^9}, 3.402276270375*^9, { 3.402276303*^9, 3.402276331140625*^9}}], Cell[TextData[{ "Multiplying through by ", Cell[BoxData[ FormBox["X", TraditionalForm]]], ":" }], "Text", CellChangeTimes->{{3.400369890184457*^9, 3.400369901127213*^9}, { 3.402276361125*^9, 3.402276362921875*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ FractionBox["\[PartialD]", RowBox[{"\[PartialD]", "w"}]], " ", RowBox[{"l", "(", "w", ")"}]}], "\[Proportional]", RowBox[{ RowBox[{ RowBox[{"-", RowBox[{"y", "\[Transpose]"}]}], "X"}], "+", RowBox[{ RowBox[{"w", "\[Transpose]"}], " ", RowBox[{"X", "\[Transpose]"}], "X"}]}]}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003597234519987`*^9, 3.400359833469032*^9}, { 3.400360292412406*^9, 3.4003602936155467`*^9}, 3.400360740699394*^9, 3.4003607993720202`*^9, {3.400360861560316*^9, 3.4003608961232586`*^9}, { 3.4003609505927057`*^9, 3.4003610055465345`*^9}, {3.400369851484765*^9, 3.400369851712788*^9}, 3.400371277536051*^9, 3.402276152265625*^9, { 3.402276197390625*^9, 3.402276226171875*^9}, 3.402276270375*^9, { 3.402276303*^9, 3.402276331140625*^9}, {3.402276368484375*^9, 3.40227637246875*^9}, 3.402276411390625*^9}], Cell[TextData[{ "Finally we set the derivative equal to zero and solve for ", Cell[BoxData[ FormBox["w", TraditionalForm]]], " to obtain:" }], "Text", CellChangeTimes->{{3.40036991346552*^9, 3.400369946525887*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ RowBox[{ FractionBox["\[PartialD]", RowBox[{"\[PartialD]", "w"}]], " ", RowBox[{"l", "(", "w", ")"}]}], "\[Proportional]", RowBox[{ RowBox[{ RowBox[{"-", RowBox[{"y", "\[Transpose]"}]}], "X"}], "+", RowBox[{ RowBox[{"w", "\[Transpose]"}], " ", RowBox[{"X", "\[Transpose]"}], "X"}]}]}], "=", " ", "0"}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003597234519987`*^9, 3.400359833469032*^9}, { 3.400360292412406*^9, 3.4003602936155467`*^9}, 3.400360740699394*^9, 3.4003607993720202`*^9, {3.400360861560316*^9, 3.4003608961232586`*^9}, { 3.4003609505927057`*^9, 3.4003610055465345`*^9}, {3.400369851484765*^9, 3.400369851712788*^9}, 3.400371277536051*^9, 3.402276152265625*^9, { 3.402276197390625*^9, 3.402276226171875*^9}, 3.402276270375*^9, { 3.402276303*^9, 3.402276331140625*^9}, {3.402276368484375*^9, 3.40227637246875*^9}, 3.402276411390625*^9, {3.402276454015625*^9, 3.402276454375*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ RowBox[{"w", "\[Transpose]"}], " ", RowBox[{"X", "\[Transpose]"}], "X"}], "=", RowBox[{ RowBox[{"y", "\[Transpose]"}], "X"}]}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003597234519987`*^9, 3.400359833469032*^9}, { 3.400360292412406*^9, 3.4003602936155467`*^9}, 3.400360740699394*^9, 3.4003607993720202`*^9, {3.400360861560316*^9, 3.4003608961232586`*^9}, { 3.4003609505927057`*^9, 3.4003610055465345`*^9}, {3.400369851484765*^9, 3.400369851712788*^9}, 3.400371277536051*^9, 3.402276152265625*^9, { 3.402276197390625*^9, 3.402276226171875*^9}, 3.402276270375*^9, { 3.402276303*^9, 3.402276331140625*^9}, {3.402276368484375*^9, 3.40227637246875*^9}, 3.402276411390625*^9, {3.402276454015625*^9, 3.402276484015625*^9}}], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"w", "\[Transpose]"}], " ", "=", RowBox[{ RowBox[{"y", "\[Transpose]"}], "X", " ", SuperscriptBox[ RowBox[{"(", RowBox[{ RowBox[{"X", "\[Transpose]"}], "X"}], ")"}], RowBox[{"-", "1"}]]}]}], TraditionalForm]], "Equation", CellChangeTimes->{{3.4003597234519987`*^9, 3.400359833469032*^9}, { 3.400360292412406*^9, 3.4003602936155467`*^9}, 3.400360740699394*^9, 3.4003607993720202`*^9, {3.400360861560316*^9, 3.4003608961232586`*^9}, { 3.4003609505927057`*^9, 3.4003610055465345`*^9}, {3.400369851484765*^9, 3.400369851712788*^9}, 3.400371277536051*^9, 3.402276152265625*^9, { 3.402276197390625*^9, 3.402276226171875*^9}, 3.402276270375*^9, { 3.402276303*^9, 3.402276331140625*^9}, {3.402276368484375*^9, 3.40227637246875*^9}, 3.402276411390625*^9, {3.402276454015625*^9, 3.40227650240625*^9}}], Cell["Finally remvoing the transpose we have:", "Text", CellChangeTimes->{{3.402276519109375*^9, 3.40227654965625*^9}, { 3.402276593984375*^9, 3.40227659478125*^9}}], Cell[BoxData[ FormBox[ RowBox[{"w", "=", " ", RowBox[{ SuperscriptBox[ RowBox[{"(", RowBox[{ RowBox[{"X", "\[Transpose]"}], "X"}], ")"}], RowBox[{"-", "1"}]], RowBox[{"X", "\[Transpose]"}], " ", "y"}]}], TraditionalForm]], "Equation",\ CellChangeTimes->{{3.4003597234519987`*^9, 3.400359833469032*^9}, { 3.400360292412406*^9, 3.4003602936155467`*^9}, 3.400360740699394*^9, 3.4003607993720202`*^9, {3.400360861560316*^9, 3.4003608961232586`*^9}, { 3.4003609505927057`*^9, 3.4003610055465345`*^9}, {3.400369851484765*^9, 3.400369851712788*^9}, 3.400371277536051*^9, 3.402276152265625*^9, { 3.402276197390625*^9, 3.402276226171875*^9}, 3.402276270375*^9, { 3.402276303*^9, 3.402276331140625*^9}, {3.402276368484375*^9, 3.40227637246875*^9}, 3.402276411390625*^9, {3.402276454015625*^9, 3.402276541484375*^9}}], Cell["Thus you have the matrix form of the MLE.", "Text", CellChangeTimes->{{3.400369952105082*^9, 3.400369960422697*^9}}] }, Open ]] }, Open ]] }, WindowSize->{1272, 907}, WindowMargins->{{0, Automatic}, {Automatic, 0}}, PrintingCopies->1, PrintingPageRange->{Automatic, Automatic}, Magnification->2., FrontEndVersion->"6.0 for Microsoft Windows (32-bit) (June 19, 2007)", StyleDefinitions->FrontEnd`FileName[{"Article"}, "JournalArticle.nb", CharacterEncoding -> "WindowsANSI"] ] (* End of Notebook Content *) (* Internal cache information *) (*CellTagsOutline CellTagsIndex->{} *) (*CellTagsIndex CellTagsIndex->{} *) (*NotebookFileOutline Notebook[{ Cell[CellGroupData[{ Cell[590, 23, 117, 1, 155, "Title"], Cell[CellGroupData[{ Cell[732, 28, 106, 1, 85, "Author"], Cell[841, 31, 507, 9, 109, "Text"] }, Open ]], Cell[CellGroupData[{ Cell[1385, 45, 96, 1, 109, "Section"], Cell[1484, 48, 1442, 44, 144, "Text"], Cell[2929, 94, 329, 9, 63, "EquationNumbered"], Cell[CellGroupData[{ Cell[3283, 107, 109, 1, 52, "Subsubsubsection"], Cell[3395, 110, 641, 18, 109, "Text"], Cell[4039, 130, 420, 12, 110, "EquationNumbered"] }, Open ]], Cell[CellGroupData[{ Cell[4496, 147, 114, 1, 51, "Subsubsubsection"], Cell[4613, 150, 407, 10, 81, "Text"], Cell[5023, 162, 724, 23, 113, "EquationNumbered"], Cell[5750, 187, 238, 3, 42, "Text"], Cell[5991, 192, 514, 15, 105, "EquationNumbered"], Cell[6508, 209, 234, 5, 43, "Text"], Cell[6745, 216, 685, 22, 119, "Equation"], Cell[7433, 240, 677, 17, 111, "Text"], Cell[8113, 259, 498, 15, 83, "EquationNumbered"], Cell[8614, 276, 159, 4, 43, "Text"], Cell[8776, 282, 1069, 30, 107, "EquationNumbered"], Cell[9848, 314, 203, 4, 43, "Text"], Cell[10054, 320, 517, 15, 107, "EquationNumbered"], Cell[10574, 337, 382, 9, 76, "Text"] }, Open ]], Cell[CellGroupData[{ Cell[10993, 351, 101, 1, 52, "Subsubsubsection"], Cell[11097, 354, 383, 8, 76, "Text"], Cell[11483, 364, 492, 13, 63, "EquationNumbered"], Cell[11978, 379, 263, 5, 76, "Text"], Cell[12244, 386, 216, 5, 55, "Equation"] }, Open ]], Cell[CellGroupData[{ Cell[12497, 396, 188, 6, 52, "Subsubsubsection"], Cell[12688, 404, 1030, 26, 175, "Text"] }, Open ]] }, Open ]], Cell[CellGroupData[{ Cell[13767, 436, 100, 1, 109, "Section"], Cell[13870, 439, 335, 6, 43, "Text"], Cell[14208, 447, 1995, 64, 127, "EquationNumbered"], Cell[16206, 513, 910, 29, 76, "Text"], Cell[17119, 544, 448, 12, 55, "Equation"], Cell[17570, 558, 464, 13, 63, "EquationNumbered"], Cell[18037, 573, 428, 12, 76, "Text"], Cell[18468, 587, 829, 26, 90, "EquationNumbered"], Cell[19300, 615, 524, 19, 43, "Text"] }, Open ]], Cell[CellGroupData[{ Cell[19861, 639, 99, 1, 109, "Section"], Cell[19963, 642, 296, 8, 43, "Text"], Cell[20262, 652, 943, 24, 78, "Equation"], Cell[21208, 678, 206, 4, 43, "Text"], Cell[21417, 684, 742, 17, 55, "Equation"], Cell[22162, 703, 171, 4, 43, "Text"], Cell[22336, 709, 1045, 26, 84, "Equation"], Cell[23384, 737, 157, 3, 43, "Text"], Cell[23544, 742, 602, 15, 55, "Equation"], Cell[24149, 759, 361, 8, 43, "Text"], Cell[24513, 769, 938, 24, 79, "Equation"], Cell[25454, 795, 399, 9, 109, "Text"], Cell[25856, 806, 1062, 27, 79, "Equation"], Cell[26921, 835, 102, 1, 43, "Text"], Cell[27026, 838, 813, 19, 79, "Equation"], Cell[27842, 859, 111, 1, 43, "Text"], Cell[27956, 862, 811, 19, 79, "Equation"], Cell[28770, 883, 100, 1, 43, "Text"], Cell[28873, 886, 924, 22, 79, "Equation"], Cell[29800, 910, 222, 7, 43, "Text"], Cell[30025, 919, 955, 21, 79, "Equation"], Cell[30983, 942, 220, 6, 43, "Text"], Cell[31206, 950, 1045, 24, 79, "Equation"], Cell[32254, 976, 822, 16, 55, "Equation"], Cell[33079, 994, 904, 19, 57, "Equation"], Cell[33986, 1015, 168, 2, 43, "Text"], Cell[34157, 1019, 877, 19, 57, "Equation"], Cell[35037, 1040, 123, 1, 43, "Text"] }, Open ]] }, Open ]] } ] *) (* End of internal cache information *)