\relax 
\bibstyle{plainnat}
\citation{BreimanFrOlSt84}
\citation{Quinlan93}
\citation{FreundSc97}
\citation{SchapireSi99}
\citation{DietterichBa95}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{265}}
\citation{Vapnik98}
\citation{ScholkopfBuSm98}
\citation{CristianiniSh00}
\citation{WestonWa99}
\citation{Vapnik98}
\citation{AllweinScSi00}
\citation{Vapnik98}
\citation{WestonWa99}
\citation{BredensteinerBe99}
\citation{GuermeurElPa00}
\citation{BoserGuVa92}
\citation{Joachims98}
\citation{Bregman67}
\citation{CensorZe97}
\citation{Platt98}
\citation{Burges98}
\citation{Platt98}
\citation{Joachims98}
\citation{AllweinScSi00}
\citation{DietterichBa95}
\citation{CrammerSi00}
\citation{CrammerSi00}
\citation{CrammerSi01}
\@writefile{toc}{\contentsline {paragraph}{Related work}{266}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Preliminaries}{267}}
\citation{HoffgenSi92}
\citation{CrammerSi00}
\citation{Vapnik98}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Illustration of the margin bound employed by the optimization problem.}}{268}}
\newlabel{bound:fig}{{1}{268}}
\newlabel{dloss:eqn}{{1}{268}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Constructing multiclass kernel-based predictors}{268}}
\newlabel{main:sec}{{3}{268}}
\citation{PlattCrSh00}
\citation{CrammerSi00}
\citation{Vapnik98}
\citation{Vapnik98}
\citation{CristianiniSh00}
\newlabel{bound}{{2}{269}}
\newlabel{all_sample_correct_conditions}{{3}{269}}
\newlabel{margin_equations}{{4}{269}}
\newlabel{optimization_problem_p_basic}{{5}{269}}
\newlabel{constraints_on_xi}{{6}{269}}
\newlabel{primal_program_l2}{{7}{269}}
\newlabel{Lagrangian_l2}{{8}{270}}
\newlabel{sum_eta_is_1}{{9}{270}}
\newlabel{value_of_mr}{{10}{270}}
\citation{CortesVa95}
\citation{Vapnik98}
\newlabel{dual_program_l2}{{11}{271}}
\newlabel{value_of_mr_tau}{{12}{271}}
\newlabel{dual_program_l2_tau}{{13}{271}}
\newlabel{dual_problem_algorithm}{{14}{271}}
\citation{WestonWa99}
\newlabel{dual_program_l2_tau_kip}{{15}{272}}
\newlabel{dual_problem_algorithm_kip}{{16}{272}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Decomposing the optimization problem}{272}}
\newlabel{decomp:sec}{{4}{272}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Skeleton of the algorithm for learning multiclass support vector machine.}}{273}}
\newlabel{algorithm_1}{{2}{273}}
\newlabel{dual_problem_A_p}{{18}{273}}
\newlabel{dual_problem_B_p}{{19}{273}}
\citation{CristianiniSh00}
\newlabel{dual_program_l2_tau_one_example}{{20}{274}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Example selection for optimization}{274}}
\newlabel{choose_and_stop}{{5}{274}}
\newlabel{lagrangian_for_reduced}{{21}{274}}
\newlabel{derative_of_l_tau_ir}{{22}{274}}
\newlabel{matrix_f_ir}{{23}{275}}
\newlabel{ftob:eqn}{{24}{275}}
\newlabel{derivative_constraint}{{25}{275}}
\newlabel{multiplication_constraint}{{26}{275}}
\newlabel{u_constraint}{{27}{275}}
\newlabel{first_case}{{28}{275}}
\newlabel{second_case}{{29}{275}}
\newlabel{psi_i}{{32}{275}}
\citation{Lin01}
\citation{KeerthiGi00}
\citation{CrammerSi00}
\@writefile{toc}{\contentsline {section}{\numberline {6}Solving the reduced optimization problem}{276}}
\newlabel{small_problem}{{6}{276}}
\newlabel{value_of_vec_nu_and_vec_D}{{33}{276}}
\newlabel{dual_program_nu}{{34}{276}}
\newlabel{nu_r:eqn}{{36}{277}}
\newlabel{equality_constraint_nu}{{37}{277}}
\newlabel{fixed_point_of_theta}{{38}{278}}
\newlabel{function_f}{{39}{278}}
\newlabel{fixed_point_of_theta_F}{{40}{278}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces The fixed-point algorithm for solving the reduced quadratic program.}}{279}}
\newlabel{algorithm_for_theta}{{3}{279}}
\newlabel{theta_tag}{{41}{279}}
\newlabel{theta_star}{{42}{279}}
\newlabel{theta_star_upper_bounds_D_r}{{43}{279}}
\citation{Platt98}
\citation{Joachims98}
\citation{CollobertBe01}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Basic algorithm for learning a multiclass, kernel-based, support vector machine using KKT conditions for example selection.}}{281}}
\newlabel{algorithm_2}{{4}{281}}
\@writefile{toc}{\contentsline {section}{\numberline {7}Implementation details}{281}}
\newlabel{impdet:sec}{{7}{281}}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces The run time (left) and test error (right) as a function of required accuracy $\epsilon $.}}{282}}
\newlabel{epsilon_effect:fig}{{5}{282}}
\@writefile{toc}{\contentsline {paragraph}{Using KKT for example selection}{282}}
\@writefile{toc}{\contentsline {paragraph}{Maintaining an active set}{282}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces The value of the objective function ${\@mathcal {Q}}$ as a function of the number of iteration for a fixed and variable scheduling of the accuracy parameter $\epsilon $.}}{283}}
\newlabel{epsilon_cooling:fig}{{6}{283}}
\@writefile{toc}{\contentsline {paragraph}{Cooling of the accuracy parameter}{283}}
\citation{Platt98}
\citation{Joachims98}
\citation{CollobertBe01}
\citation{Joachims98}
\citation{Platt98}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Comparison of the run-time on the MNIST dataset of the different versions as a function of the training-set size. Version 1 is the baseline implementation. Version 2 uses KKT conditions for selecting an example to update. Version 3 adds the usage of an active set and cooling of $\epsilon $. Version 4 adds caching of inner-products. Finally, version 5 uses data structures for representing and using sparse inputs.}}{284}}
\newlabel{run_time:fig}{{7}{284}}
\@writefile{toc}{\contentsline {paragraph}{Caching}{284}}
\citation{Platt98}
\newlabel{properties}{{7}{285}}
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Description of the small databases used in the experiments.}}{285}}
\newlabel{small_set_exper:table}{{1}{285}}
\@writefile{toc}{\contentsline {paragraph}{Data-structures for sparse input instances}{285}}
\@writefile{toc}{\contentsline {section}{\numberline {8}Experiments}{285}}
\newlabel{exper:sec}{{8}{285}}
\citation{Scholkopf97}
\citation{ScholkopfSuBuGiNiPoVa97}
\citation{Dietterich00}
\citation{DeCosteSc01}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Comparison of a multiclass SVM build using the one-against-rest approach with the multiclass support vector machines studied in this paper.}}{286}}
\newlabel{decrease_in_error_rate:fig}{{8}{286}}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Examples of images which are support patterns with a large norm of ${\mathaccent "7016\relax {\tau }}$ (MNIST dataset).}}{287}}
\newlabel{demo:fig}{{9}{287}}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Summary of experiments with large datasets.}}{288}}
\newlabel{large_set_exper_result:table}{{2}{288}}
\@writefile{toc}{\contentsline {section}{\numberline {9}Summary}{288}}
\newlabel{conc:sec}{{9}{288}}
\@writefile{toc}{\contentsline {section}{\numberline {A}Derivation of the dual optimization problem}{289}}
\newlabel{calculations}{{A}{289}}
\newlabel{q_stage_1}{{44}{289}}
\newlabel{star_1}{{45}{289}}
\newlabel{star_2}{{46}{289}}
\bibdata{bib}
\bibcite{AllweinScSi00}{{1}{2000}{{Allwein et~al.}}{{Allwein, Schapire, and Singer}}}
\bibcite{BoserGuVa92}{{2}{1992}{{Boser et~al.}}{{Boser, Guyon, and Vapnik}}}
\bibcite{BredensteinerBe99}{{3}{1999}{{Bredensteiner and Bennet}}{{}}}
\bibcite{Bregman67}{{4}{1967}{{Bregman}}{{}}}
\bibcite{BreimanFrOlSt84}{{5}{1984}{{Breiman et~al.}}{{Breiman, Friedman, Olshen, and Stone}}}
\bibcite{Burges98}{{6}{1998}{{Burges}}{{}}}
\bibcite{CensorZe97}{{7}{1997}{{Censor and Zenios}}{{}}}
\bibcite{CollobertBe01}{{8}{2001}{{Collobert and Bengio}}{{}}}
\bibcite{CortesVa95}{{9}{1995}{{Cortes and Vapnik}}{{}}}
\newlabel{star_3}{{47}{290}}
\newlabel{star_12}{{48}{290}}
\bibcite{CrammerSi00}{{10}{2000}{{Crammer and Singer}}{{}}}
\bibcite{CrammerSi01}{{11}{2001}{{Crammer and Singer}}{{}}}
\bibcite{CristianiniSh00}{{12}{2000}{{Cristianini and Shawe-Taylor}}{{}}}
\bibcite{DeCosteSc01}{{13}{2001}{{DeCoste and Sch\"olkopf}}{{}}}
\bibcite{Dietterich00}{{14}{2000}{{Dietterich}}{{}}}
\bibcite{DietterichBa95}{{15}{1995}{{Dietterich and Bakiri}}{{}}}
\bibcite{FreundSc97}{{16}{1997}{{Freund and Schapire}}{{}}}
\bibcite{GuermeurElPa00}{{17}{2000}{{Guermeur et~al.}}{{Guermeur, Elisseeff, and Paugam-Moisy}}}
\bibcite{HoffgenSi92}{{18}{1992}{{H\"offgen and Simon}}{{}}}
\bibcite{Joachims98}{{19}{1998}{{Joachims}}{{}}}
\bibcite{KeerthiGi00}{{20}{2000}{{Keerthi and Gilbert}}{{}}}
\bibcite{Lin01}{{21}{2001}{{Lin}}{{}}}
\bibcite{Platt98}{{22}{1998}{{Platt}}{{}}}
\bibcite{PlattCrSh00}{{23}{2000}{{Platt et~al.}}{{Platt, Cristianini, and Shawe-Taylor}}}
\bibcite{Quinlan93}{{24}{1993}{{Quinlan}}{{}}}
\bibcite{SchapireSi99}{{25}{1999}{{Schapire and Singer}}{{}}}
\bibcite{Scholkopf97}{{26}{1997}{{Sch\"olkopf}}{{}}}
\bibcite{ScholkopfBuSm98}{{27}{1998}{{Sch\"olkopf et~al.}}{{Sch\"olkopf, Burges, and Smola}}}
\bibcite{ScholkopfSuBuGiNiPoVa97}{{28}{1996}{{{Sch\"olkopf} et~al.}}{{{Sch\"olkopf}, Sung, Burges, Girosi, Niyogi, Poggio, and Vapnik}}}
\bibcite{Vapnik98}{{29}{1998}{{Vapnik}}{{}}}
\bibcite{WestonWa99}{{30}{1999}{{Weston and Watkins}}{{}}}
