\begin{thebibliography}{}

\bibitem[Akaike, 1974]{Akaike74}
Akaike, H. (1974).
\newblock A new look at the statistical model identification.
\newblock {\em IEEE Transactions on Automatic Control}, 19(6):716--723.

\bibitem[Andersson et~al., 1997]{AMP97}
Andersson, S.~A., Madigan, D., and Perlman, M.~D. (1997).
\newblock A characterization of {M}arkov equivalence classes for acyclic
  digraphs.
\newblock {\em Annals of Statistics}, 25:505--541.

\bibitem[Blair and Peyton, 1993]{BlairPeyton93}
Blair, J. R.~S. and Peyton, B.~W. (1993).
\newblock An introduction to chordal graphs and clique trees.
\newblock In {\em Graph Theory and Sparse Matrix Computations}, pages 1--29.

\bibitem[Bouckaert, 1993]{Bouckaert93}
Bouckaert, R.~R. (1993).
\newblock Probabilistic network construction using the minimum description
  length principle.
\newblock In {\em Lecture Notes in Computer Science}, 747:41--48.
  Springer.

\bibitem[Buntine, 1991]{Buntine91}
Buntine, W.~L. (1991).
\newblock Theory refinement on {Bayesian} networks.
\newblock In D'Ambrosio, B.~D., Smets, P., and Bonissone, P.~P., editors, {\em
  Proceedings of the Seventh Conference on Uncertainty in Artificial
  Intelligence}, pages 52--60. Morgan Kaufmann.

\bibitem[Buntine, 1996]{Buntine96}
Buntine, W.~L. (1996).
\newblock A guide to the literature on learning probabilistic networks from
  data.
\newblock {\em IEEE Transactions on Knowledge and Data Engineering},
  8:195--210.

\bibitem[Chickering, 1995]{Chickering95a}
Chickering, D.~M. (1995).
\newblock A transformational characterization of {Bayesian} network structures.
\newblock In Hanks, S. and Besnard, P., editors, {\em Proceedings of the
  Eleventh Conference on Uncertainty in Artificial Intelligence}, pages 87--98. Morgan Kaufmann.

\bibitem[Chickering, 1996a]{Chickering96lns}
Chickering, D.~M. (1996a).
\newblock Learning {Bayesian} networks is {NP}-{Complete}.
\newblock In Fisher, D. and Lenz, H., editors, {\em Learning from Data:
  Artificial Intelligence and Statistics V}, pages 121--130. Springer-Verlag.

\bibitem[Chickering, 1996b]{Chickering96uai}
Chickering, D.~M. (1996b).
\newblock Learning equivalence classes of {Bayesian}-network structures.
\newblock In Horvitz, E. and Jensen, F., editors, {\em Proceedings of the
  Twelfth Conference on Uncertainty in Artificial Intelligence}, pages 150--157. Morgan Kaufmann.

\bibitem[Chickering et~al., 1995]{CGH95aistats}
Chickering, D.~M., Geiger, D., and Heckerman, D. (1995).
\newblock Learning {Bayesian} networks: Search methods and experimental
  results.
\newblock In {\em Proceedings of the Fifth International Workshop on Artificial
  Intelligence and Statistics}, pages 112--128.

\bibitem[Chow and Liu, 1968]{Chow68}
Chow, C. and Liu, C. (1968).
\newblock Approximating discrete probability distributions with dependence
  trees.
\newblock {\em IEEE Transactions on Information Theory}, 14:462--467.

\bibitem[Cooper and Herskovits, 1992]{CH92}
Cooper, G. and Herskovits, E. (1992).
\newblock A {Bayesian} method for the induction of probabilistic networks from
  data.
\newblock {\em Machine Learning}, 9:309--347.

\bibitem[Dash and Druzdzel, 1999]{DD99}
Dash, D. and Druzdzel, M.~J. (1999).
\newblock A hybrid anytime algorithm for the construction of causal models from
  sparse data.
\newblock In Laskey, K. and Prade, H., editors, {\em Proceedings of the
  Fifteenth Conference on Uncertainty in Artificial Intelligence}, pages 142--149. Morgan Kaufmann.

\bibitem[Dor and Tarsi, 1992]{DorTarsi92}
Dor, D. and Tarsi, M. (1992).
\newblock A simple algorithm to construct a consistent extension of a partially
  oriented graph.
\newblock Technical Report R-185, Cognitive Systems Laboratory, UCLA Computer
  Science Department.

\bibitem[Druzdzel and Simon, 1993]{DS93}
Druzdzel, M. and Simon, H. (1993).
\newblock Causality in {B}ayesian belief networks.
\newblock In Heckerman, D. and Mamdani, A., editors, {\em Proceedings of the
  Ninth Conference on Uncertainty in Artificial Intelligence}, pages 3--11. Morgan Kaufmann.

\bibitem[Gillispie and Perlman, 2001]{GP01}
Gillispie, S.~B. and Perlman, M.~D. (2001).
\newblock Enumerating {M}arkov equivalence classes of acyclic digraph models.
\newblock In Goldszmidt, M., Breese, J., and Koller, D., editors, {\em
  Proceedings of the Seventeenth Conference on Uncertainty in Artificial
  Intelligence}, pages 171--177. Morgan Kaufmann.

\bibitem[Heckerman, 1996]{Heckerman95tut}
Heckerman, D. (1996).
\newblock A tutorial on learning {B}ayesian networks.
\newblock Technical Report MSR-TR-95-06, Microsoft Research.

\bibitem[Heckerman et~al., 1995]{HGC95}
Heckerman, D., Geiger, D., and Chickering, D. (1995).
\newblock Learning {Bayesian} networks: The combination of knowledge and
  statistical data.
\newblock {\em Machine Learning}, 20:197--243.

\bibitem[Heckerman and Shachter, 1995]{HS95}
Heckerman, D. and Shachter, R. (1995).
\newblock Decision-theoretic foundations for causal reasoning.
\newblock {\em Journal of Artificial Intelligence Research}, 3:405--430.

\bibitem[Jordan, 1998]{Jordan98}
Jordan, M., editor (1998).
\newblock {\em Learning in Graphical Models}, volume~89.
\newblock Kluwer, Boston, MA, {NATO ASI}, {Series D}: {Behavioural and Social
  Sciences} edition.

\bibitem[Kocka et~al., 2001]{KBS01}
Kocka, T., Bouckaert, R.~R., and Studeny, M. (2001).
\newblock On characterizing inclusion of {B}ayesian networks.
\newblock In Breese, J. and Koller, D., editors, {\em Proceedings of the
  Seventeenth Conference on Uncertainty in Artificial Intelligence}, pages 261--268. Morgan Kaufmann.

\bibitem[Madigan et~al., 1996]{MAPV96}
Madigan, D., Andersson, S.~A., Perlman, M.~D., and Volinsky, C.~T. (1996).
\newblock Bayesian model averaging and model selection for {M}arkov equivalence
  classes of acyclic digraphs.
\newblock {\em Communications in Statistics - Theory and Methods},
  25:2493--2520.

\bibitem[Meek, 1995]{Meek95uai}
Meek, C. (1995).
\newblock Causal inference and causal explanation with background knowledge.
\newblock In Hanks, S. and Besnard, P., editors, {\em Proceedings of the
  Eleventh Conference on Uncertainty in Artificial Intelligence}, pages 403--410. Morgan Kaufmann.

\bibitem[Meek, 1997]{Meek97}
Meek, C. (1997).
\newblock {\em Graphical Models: {S}electing causal and statistical models}.
\newblock PhD thesis, Carnegie Mellon University.

\bibitem[Munteanu and Bendou, 2001]{MB01}
Munteanu, P. and Bendou, M. (2001).
\newblock The {EQ} framework for learning equivalence classes of {B}ayesian
  networks.
\newblock In Cercone, N., Lin, T., and Wu, X., editors, {\em IEEE International
  Conference on Data Mining}, pages 417--424.

\bibitem[Munteanu and Cau, 2000]{MC00}
Munteanu, P. and Cau, D. (2000).
\newblock Efficient score-based learning of equivalence classes of {B}ayesian
  networks.
\newblock In {\em Lecture Notes in Computer Science}, 1910:96--105. Springer.

\bibitem[Pearl, 1988]{Pearl88}
Pearl, J. (1988).
\newblock {\em Probabilistic Reasoning in Intelligent Systems: Networks of
  Plausible Inference}.
\newblock Morgan Kaufmann, San Mateo, CA.

\bibitem[Pearl and Verma, 1991]{PV91}
Pearl, J. and Verma, T. (1991).
\newblock A theory of inferred causation.
\newblock In Allen, J., Fikes, R., and Sandewall, E., editors, {\em Knowledge
  Representation and Reasoning: Proceedings of the Second International
  Conference}, pages 441--452. Morgan Kaufmann, New York.

\bibitem[Rissanen, 1986]{Rissanen86}
Rissanen, J. (1986).
\newblock Stochastic complexity and modeling.
\newblock {\em The Annals of Statistics}, 14(3):1080--1100.

\bibitem[Schwarz, 1978]{Schwarz78}
Schwarz, G. (1978).
\newblock Estimating the dimension of a model.
\newblock {\em Annals of Statistics}, 6:461--464.

\bibitem[Spirtes et~al., 1993]{Spirtes93}
Spirtes, P., Glymour, C., and Scheines, R. (1993).
\newblock {\em Causation, Prediction, and Search}.
\newblock Springer-Verlag, New York.

\bibitem[Spirtes and Meek, 1995]{SM95}
Spirtes, P. and Meek, C. (1995).
\newblock Learning {B}ayesian networks with discrete variables from data.
\newblock In Fayyad, U. and Uthurusamy, R., editors, {\em Proceedings of the
  First International Conference on Knowledge Discovery and Data Mining}, pages
  294--299. AAAI Press.

\bibitem[Suzuki, 1993]{Suzuki93uai}
Suzuki, J. (1993).
\newblock A construction of {B}ayesian networks from databases based on an
  {MDL} principle.
\newblock In Heckerman, D. and Mamdani, A., editors, {\em Proceedings of the
  Ninth Conference on Uncertainty in Artificial Intelligence}, pages 266--273. Morgan Kaufmann.

\bibitem[Tarjan and Yannakakis, 1984]{TY84}
Tarjan, R. and Yannakakis, M. (1984).
\newblock Simple linear-time algorithms to test chordality of graphs, test
  acyclicity of hypergraphs, and selectively reduce acyclic hypergraphs.
\newblock {\em {SIAM} Journal of Computing}, 13:566--579.

\bibitem[Verma and Pearl, 1990]{VP90}
Verma, T. and Pearl, J. (1990).
\newblock Equivalence and synthesis of causal models.
\newblock In Henrion, M., Shachter, R., Kanal, L., and Lemmer, J., editors,
  {\em Proceedings of the Sixth Conference on Uncertainty in Artificial
  Intelligence}, pages 220--227.

\end{thebibliography}