\relax 
\bibstyle{plainnat}
\citation{Lewis:SIGIR94}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{45}}
\newlabel{Introduction}{{1}{45}}
\citation{Vapnik:1998}
\citation{Rocchio}
\citation{Dumais+al:CIKM98}
\citation{Sebastiani:2001}
\citation{Joachims:ECML97}
\citation{Dumais+al:CIKM98}
\citation{Vapnik:1982}
\citation{Burges:1998}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces (a) A simple linear support vector machine. (b) A SVM (dotted line) and a transductive SVM (solid line). Solid circles represent unlabeled instances.}}{47}}
\newlabel{fig:max_margin}{{1}{47}}
\newlabel{fig:tsvm}{{1}{47}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Support Vector Machines}{47}}
\newlabel{SVMs}{{2}{47}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}SVMs for Induction}{47}}
\newlabel{eq:kernel_classifier}{{1}{47}}
\newlabel{eq:kernel_hyperplane}{{2}{47}}
\citation{Joachims:ICML99}
\citation{Mitchell:AI82}
\citation{ShaweTaylor:COLT99}
\citation{Cortes+Vapnik:ML95}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}SVMs for Transduction}{48}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Version Space}{48}}
\newlabel{Version_Space}{{3}{48}}
\newlabel{def:version_space}{{1}{48}}
\citation{Vapnik:1998}
\citation{Herbrich+al}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces (a) Version space duality. The surface of the hypersphere represents unit weight vectors. Each of the two hyperplanes corresponds to a labeled training instance. Each hyperplane restricts the area on the hypersphere in which consistent hypotheses can lie. Here, the version space is the surface segment of the hypersphere closest to the camera. (b) An SVM classifier in a version space. The dark embedded sphere is the largest radius sphere whose center lies in the version space and whose surface does not intersect with the hyperplanes. The center of the embedded sphere corresponds to the SVM, its radius is proportional to the margin of the SVM in $\@mathcal {F}$, and the training points corresponding to the hyperplanes that it touches are the support vectors.}}{49}}
\newlabel{fig:duality}{{2}{49}}
\citation{Seung+al:COLT92}
\citation{Horvitz+Rutledge:91}
\citation{Latombe:1991}
\citation{Heckerman+al:94}
\@writefile{toc}{\contentsline {section}{\numberline {4}Active Learning}{51}}
\newlabel{Active}{{4}{51}}
\citation{Freund+al:ML97}
\newlabel{lemma:ideal_learning}{{4}{52}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces (a) $\@mathsf {Simple}$ Margin will query $\@mathbf {b}$. (b) $\@mathsf {Simple}$ Margin will query $\@mathbf {a}$.}}{53}}
\newlabel{fig:simple}{{3}{53}}
\newlabel{fig:simple2}{{3}{53}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces (a) $\@mathsf {MaxMin}$ Margin will query $\@mathbf {b}$. The two SVMs with margins $m^{-}$ and $m^{+}$ for $\@mathbf {b}$ are shown. (b) $\@mathsf {Ratio}$ Margin will query $\@mathbf {e}$. The two SVMs with margins $m^{-}$ and $m^{+}$ for $\@mathbf {e}$ are shown. }}{53}}
\newlabel{fig:maxMargin}{{4}{53}}
\newlabel{fig:maxRatio}{{4}{53}}
\citation{Herbrich+al}
\citation{Vapnik:1998}
\citation{Campbell+al:2000}
\citation{Libbow}
\citation{JoachimsSVMlight:1999}
\@writefile{toc}{\contentsline {section}{\numberline {5}Experiments}{55}}
\newlabel{Experiments}{{5}{55}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Reuters Data Collection Experiments}{55}}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces  (a) Average test set accuracy over the ten most frequently occurring topics when using a pool size of 1000. (b) Average test set precision/recall breakeven point over the ten most frequently occurring topics when using a pool size of 1000. }}{56}}
\newlabel{fig:reuter1000acc}{{5}{56}}
\newlabel{fig:reuter1000be}{{5}{56}}
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Average test set accuracy over the top ten most frequently occurring topics (most frequent topic first) when trained with ten labeled documents. Boldface indicates statistical significance.}}{56}}
\newlabel{fig:reuter10equiv.acc}{{1}{56}}
\citation{Joachims:ECML97}
\citation{McCallum+Nigam:ICML98}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Average test set precision/recall breakeven point over the top ten most frequently occurring topics (most frequent topic first) when trained with ten labeled documents. Boldface indicates statistical significance.}}{57}}
\newlabel{fig:reuter10equiv.be}{{2}{57}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces  (a) Average test set accuracy over the ten most frequently occurring topics when using a pool size of 1000. (b) Average test set precision/recall breakeven point over the ten most frequently occurring topics when using a pool size of 1000. }}{58}}
\newlabel{fig:reuter_balanced_acc}{{6}{58}}
\newlabel{fig:reuter_balanced_be}{{6}{58}}
\citation{Joachims:ECML97}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces  (a) Average test set accuracy over the ten most frequently occurring topics when using a pool sizes of 500 and 1000. (b) Average breakeven point over the ten most frequently occurring topics when using a pool sizes of 500 and 1000. }}{59}}
\newlabel{fig:reuter500_1000acc}{{7}{59}}
\newlabel{fig:reuter500_1000be}{{7}{59}}
\citation{Lang95}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces  Average pool set precision/recall breakeven point over the ten most frequently occurring topics when using a pool size of 1000. }}{60}}
\newlabel{fig:reutertrans}{{8}{60}}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces  (a) Average test set accuracy over the five $\@mathtt {comp.*}$ topics when using a pool size of 500. (b) Average test set accuracy for $\@mathtt {comp.sys.ibm.pc.hardware}$ with a 500 pool size.}}{60}}
\newlabel{fig:ng.all.acc}{{9}{60}}
\newlabel{fig:ng.ibm.acc}{{9}{60}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Newsgroups Experiments}{60}}
\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces  (a) A simple example of querying unlabeled clusters. (b) Macro-average test set accuracy for $\@mathtt {comp.os.ms\unhbox \voidb@x \hbox {-}windows.misc}$ and $\@mathtt {comp.sys.ibm.pc.hardware}$ where $\@mathsf {Hybrid}$ uses the $\@mathsf {Ratio}$ method for the first ten queries and $\@mathsf {Simple}$ for the rest. }}{61}}
\newlabel{fig:hybrid}{{10}{61}}
\newlabel{fig:cluster}{{10}{61}}
\citation{Seung+al:COLT92}
\citation{Freund+al:ML97}
\citation{Dagan+Engelson:ICML95}
\citation{McCallum+Nigam:ICML98}
\citation{Joachims:ECML97}
\citation{Dumais+al:CIKM98}
\citation{McCallum+Nigam:ICML98}
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Typical run times in seconds for the Active methods on the $\@mathsf {Newsgroups}$ dataset}}{62}}
\newlabel{table:runtimes}{{3}{62}}
\@writefile{toc}{\contentsline {section}{\numberline {6}Related Work}{62}}
\newlabel{Related}{{6}{62}}
\citation{Lewis:SIGIR94}
\citation{Lewis:ICML94}
\citation{Campbell+al:2000}
\citation{Schohn+Cohn:2000}
\citation{Cortes+Vapnik:ML95}
\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces  (a) Average breakeven point performance over the Corn, Trade and Acq $\@mathsf {Reuters}$-21578 categories. (b) Average test set accuracy over the top ten $\@mathsf {Reuters}$-21578 categories. }}{63}}
\newlabel{fig:vs.mccallum}{{11}{63}}
\newlabel{fig:vs.liere}{{11}{63}}
\citation{Lewis:SIGIR94}
\citation{McCallum+Nigam:ICML98}
\citation{Herbrich+al}
\citation{McAllester:COLT99}
\citation{Gert:NIPS00}
\@writefile{toc}{\contentsline {section}{\numberline {7}Conclusions and Future Work}{64}}
\newlabel{Conclusions}{{7}{64}}
\bibdata{simon}
\bibcite{Burges:1998}{{1}{1998}{{Burges}}{{}}}
\bibcite{Campbell+al:2000}{{2}{2000}{{Campbell et~al.}}{{Campbell, Cristianini, and Smola}}}
\bibcite{Gert:NIPS00}{{3}{2001}{{Cauwenberghs and Poggio}}{{}}}
\bibcite{Cortes+Vapnik:ML95}{{4}{1995}{{Cortes and Vapnik}}{{}}}
\bibcite{Dagan+Engelson:ICML95}{{5}{1995}{{Dagan and Engelson}}{{}}}
\bibcite{Dumais+al:CIKM98}{{6}{1998}{{Dumais et~al.}}{{Dumais, Platt, Heckerman, and Sahami}}}
\bibcite{Freund+al:ML97}{{7}{1997}{{Freund et~al.}}{{Freund, Seung, Shamir, and Tishby}}}
\bibcite{Heckerman+al:94}{{8}{1994}{{Heckerman et~al.}}{{Heckerman, Breese, and Rommelse}}}
\bibcite{Herbrich+al}{{9}{2001}{{Herbrich et~al.}}{{Herbrich, Graepel, and Campbell}}}
\bibcite{Horvitz+Rutledge:91}{{10}{1991}{{Horvitz and Rutledge}}{{}}}
\bibcite{Joachims:ECML97}{{11}{1998}{{Joachims}}{{}}}
\bibcite{JoachimsSVMlight:1999}{{12}{1999{a}}{{Joachims}}{{}}}
\bibcite{Joachims:ICML99}{{13}{1999{b}}{{Joachims}}{{}}}
\bibcite{Lang95}{{14}{1995}{{Lang}}{{}}}
\bibcite{Latombe:1991}{{15}{1991}{{Latombe}}{{}}}
\bibcite{Lewis:ICML94}{{16}{1994}{{Lewis and Catlett}}{{}}}
\bibcite{Lewis:SIGIR94}{{17}{1994}{{Lewis and Gale}}{{}}}
\bibcite{McAllester:COLT99}{{18}{1999}{{McAllester}}{{}}}
\bibcite{Libbow}{{19}{1996}{{McCallum}}{{}}}
\bibcite{McCallum+Nigam:ICML98}{{20}{1998}{{McCallum and Nigam}}{{}}}
\bibcite{Mitchell:AI82}{{21}{1982}{{Mitchell}}{{}}}
\bibcite{Rocchio}{{22}{1971}{{Rocchio}}{{}}}
\bibcite{Schohn+Cohn:2000}{{23}{2000}{{Schohn and Cohn}}{{}}}
\bibcite{Sebastiani:2001}{{24}{2001}{{Sebastiani}}{{}}}
\bibcite{Seung+al:COLT92}{{25}{1992}{{Seung et~al.}}{{Seung, Opper, and Sompolinsky}}}
\bibcite{ShaweTaylor:COLT99}{{26}{1999}{{Shawe-Taylor and Cristianini}}{{}}}
\bibcite{Vapnik:1982}{{27}{1982}{{Vapnik}}{{}}}
\bibcite{Vapnik:1998}{{28}{1998}{{Vapnik}}{{}}}