NoisePage
In the last two decades, both researchers and vendors have built advisory tools to assist database administrators in various aspects of system tuning and physical design. Most of this previous work, however, is incomplete because they still require humans to make the final decisions about any changes to the database and are reactionary measures that fix problems after they occur.
What is needed for a truly “self-driving” database management system (DBMS) is a new architecture that is designed for autonomous operation. This is different than earlier attempts because all aspects of the system are controlled by an integrated planning component that not only optimizes the system for the current workload, but also predicts future workload trends so that the system can prepare itself accordingly. With this, the DBMS can support all of the previous tuning techniques without requiring a human to determine the right way and proper time to deploy them. It also enables new optimizations that are important for modern high-performance DBMSs, but which are not possible today because the complexity of managing these systems has surpassed the abilities of human experts.
NoisePage is a relational database management system designed for autonomous operation.
People
Acknowledgements
This project supported (in part) by Google, Amazon, Alfred P. Sloan Research Fellowship, and the U.S. National Science Foundation (CCF-1438955, IIS-1718582, SPX-1822933, IIS-1846158).
Publications
- W. S. Lim, L. Ma, W. Zhang, M. Butrovich, S. I. Arch, and A. Pavlo, "Hit the Gym: Accelerating Query Execution to Efficiently Bootstrap Behavior Models for Self-Driving Database Management Systems," Proc. VLDB Endow., vol. 17, iss. 11, pp. 3680-3693, 2024. PDF
Bibtex
@article{lim24boot, author = {Lim, Wan Shen and Ma, Lin and Zhang, William and Butrovich, Matthew and Arch, Samuel I and Pavlo, Andrew}, title = {Hit the Gym: Accelerating Query Execution to Efficiently Bootstrap Behavior Models for Self-Driving Database Management Systems}, journal = {Proc. {VLDB} Endow.}, volume = {17}, number = {11}, pages = {3680--3693}, year = {2024}, url = {https://www.vldb.org/pvldb/vol17/p3680-lim.pdf}, }
- W. Zhang, W. S. Lim, M. Butrovich, and A. Pavlo, "The Holon Approach for Simultaneously Tuning Multiple Components in a Self-Driving Database Management System with Machine Learning via Synthesized Proto-Actions," Proc. VLDB Endow., vol. 17, iss. 11, pp. 3373-3387, 2024. PDF
Bibtex
@article{zhang24holon, author = {Zhang, William and Lim, Wan Shen and Butrovich, Matthew and Pavlo, Andrew}, title = {The Holon Approach for Simultaneously Tuning Multiple Components in a Self-Driving Database Management System with Machine Learning via Synthesized Proto-Actions}, journal = {Proc. {VLDB} Endow.}, volume = {17}, number = {11}, pages = {3373--3387}, year = {2024}, url = {https://www.vldb.org/pvldb/vol17/p3373-zhang.pdf}, }
- W. S. Lim, M. Butrovich, W. Zhang, A. Crotty, L. Ma, P. Xu, J. Gehrke, and A. Pavlo, "Database Gyms," in CIDR 2023, Conference on Innovative Data Systems Research, 2023. PDF
Bibtex
@inproceedings{lim23, author = {Lim, Wan Shen and Butrovich, Matthew and Zhang, William and Crotty, Andrew and Ma, Lin and Xu, Peijing and Gehrke, Johannes and Pavlo, Andrew}, title = {Database Gyms}, booktitle = {{CIDR} 2023, Conference on Innovative Data Systems Research}, year = {2023}, url = {https://db.cs.cmu.edu/papers/2023/p27-lim.pdf}, }
- M. Butrovich, W. S. Lim, L. Ma, J. Rollinson, W. Zhang, Y. Xia, and A. Pavlo, "Tastes Great! Less Filling! High Performance and Accurate Training Data Collection for Self-Driving Database Management Systems," in Proceedings of the 2022 International Conference on Management of Data, 2022, pp. 617-630. PDF
Bibtex
@inproceedings{butrovich22, author = {Butrovich, Matthew and Lim, Wan Shen and Ma, Lin and Rollinson, John and Zhang, William and Xia, Yu and Pavlo, Andrew}, title = {Tastes Great! Less Filling! High Performance and Accurate Training Data Collection for Self-Driving Database Management Systems}, year = {2022}, booktitle = {Proceedings of the 2022 International Conference on Management of Data}, pages = {617--630}, numpages = {14}, series = {SIGMOD '22}, doi = {10.1145/3514221.3517845}, url = {https://db.cs.cmu.edu/papers/2022/moddm074-butrovich.pdf}, }
- L. Ma, W. Zhang, J. Jiao, W. Wang, M. Butrovich, W. S. Lim, P. Menon, and A. Pavlo, "MB2: Decomposed Behavior Modeling for Self-Driving Database Management Systems," in Proceedings of the 2021 International Conference on Management of Data, 2021, pp. 1248-1261. PDF
Bibtex
@inproceedings{ma21, author = {Ma, Lin and Zhang, William and Jiao, Jie and Wang, Wuwen and Butrovich, Matthew and Lim, Wan Shen and Menon, Prashanth and Pavlo, Andrew}, title = {MB2: Decomposed Behavior Modeling for Self-Driving Database Management Systems}, year = {2021}, url = {https://db.cs.cmu.edu/papers/2021/ma-sigmod2021.pdf}, doi = {10.1145/3448016.3457276}, booktitle = {Proceedings of the 2021 International Conference on Management of Data}, pages = {1248--1261}, numpages = {14}, series = {SIGMOD '21}, }
- A. Ngom, P. Menon, M. Butrovich, L. Ma, W. S. Lim, T. C. Mowry, and A. Pavlo, "Filter Representation in Vectorized Query Execution," in Proceedings of the 17th International Workshop on Data Management on New Hardware (DaMoN 2021), 2021. PDF
Bibtex
@inproceedings{ngom21, author = {Ngom, Amadou and Menon, Prashanth and Butrovich, Matthew and Ma, Lin and Lim, Wan Shen and Mowry, Todd C. and Pavlo, Andrew}, title = {Filter Representation in Vectorized Query Execution}, year = {2021}, url = {https://db.cs.cmu.edu/papers/2021/ngom-damon2021.pdf}, doi = {10.1145/3465998.3466009}, booktitle = {Proceedings of the 17th International Workshop on Data Management on New Hardware (DaMoN 2021)}, articleno = {6}, numpages = {7}, series = {DAMON'21}, }
- A. Pavlo, M. Butrovich, L. Ma, W. S. Lim, P. Menon, D. Van Aken, and W. Zhang, "Make Your Database System Dream of Electric Sheep: Towards Self-Driving Operation," Proc. VLDB Endow., vol. 14, iss. 12, pp. 3211-3221, 2021. PDF
Bibtex
@article{pavlo21, author = {Pavlo, Andrew and Butrovich, Matthew and Ma, Lin and Lim, Wan Shen and Menon, Prashanth and Van Aken, Dana and Zhang, William}, title = {Make Your Database System Dream of Electric Sheep: Towards Self-Driving Operation}, journal = {Proc. {VLDB} Endow.}, volume = {14}, number = {12}, pages = {3211--3221}, year = {2021}, url = {https://db.cs.cmu.edu/papers/2021/p3211-pavlo.pdf}, }
- L. Zhang, M. Butrovich, T. Li, A. Pavlo, Y. Nannapaneni, J. Rollinson, H. Zhang, A. Balakumar, D. Biales, Z. Dong, E. J. Eppinger, J. E. Gonzalez, W. S. Lim, J. Liu, L. Ma, P. Menon, S. Mukherjee, T. Nayak, A. Ngom, D. Niu, D. Patra, P. Raj, S. Wang, W. Wang, Y. Yu, and W. Zhang, "Everything is a Transaction: Unifying Logical Concurrency Control and Physical Data Structure Maintenance in Database Management Systems," in CIDR 2021, Conference on Innovative Data Systems Research, 2021. PDF
Bibtex
@inproceedings{zhang21, author = {Ling Zhang and Matthew Butrovich and Tianyu Li and Andrew Pavlo and Yash Nannapaneni and John Rollinson and Huanchen Zhang and Ambarish Balakumar and Daniel Biales and Ziqi Dong and Emmanuel J Eppinger and Jordi E Gonzalez and Wan Shen Lim and Jianqiao Liu and Lin Ma and Prashanth Menon and Soumil Mukherjee and Tanuj Nayak and Amadou Ngom and Dong Niu and Deepayan Patra and Poojita Raj and Stephanie Wang and Wuwen Wang and Yao Yu and William Zhang}, title = {Everything is a Transaction: Unifying Logical Concurrency Control and Physical Data Structure Maintenance in Database Management Systems}, booktitle = {{CIDR} 2021, Conference on Innovative Data Systems Research}, year = {2021}, url = {https://db.cs.cmu.edu/papers/2021/cidr2021_paper06.pdf}, }
- T. Li, M. Butrovich, A. Ngom, W. S. Lim, W. McKinney, and A. Pavlo, "Mainlining Databases: Supporting Fast Transactional Workloads on Universal Columnar Data File Formats," Proc. VLDB Endow., vol. 14, iss. 4, pp. 534-546, 2020. PDF
Bibtex
@article{li2020, author = {Tianyu Li and Matthew Butrovich and Amadou Ngom and Wan Shen Lim and Wes McKinney and Andrew Pavlo}, title = {Mainlining Databases: Supporting Fast Transactional Workloads on Universal Columnar Data File Formats}, journal = {Proc. {VLDB} Endow.}, volume = {14}, number = {4}, pages = {534--546}, year = {2020}, url = {https://db.cs.cmu.edu/papers/2020/p534-li.pdf}, }
- P. Menon, A. Ngom, and A. P. Lin Ma Todd C. Mowry, "Permutable Compiled Queries: Dynamically Adapting Compiled Queries without Recompiling," Proc. VLDB Endow., vol. 14, iss. 2, pp. 101-113, 2020. PDF
Bibtex
@article{menon2020, author = {Prashanth Menon and Amadou Ngom and Lin Ma, Todd C. Mowry, Andrew Pavlo}, title = {Permutable Compiled Queries: Dynamically Adapting Compiled Queries without Recompiling}, journal = {Proc. {VLDB} Endow.}, volume = {14}, number = {2}, pages = {101--113}, year = {2020}, url = {https://db.cs.cmu.edu/papers/2020/p101-menon.pdf}, }
- A. Pavlo, M. Butrovich, A. Joshi, L. Ma, P. Menon, D. V. Aken, L. Lee, and R. Salakhutdinov, "External vs. Internal: An Essay on Machine Learning Agents for Autonomous Database Management Systems," IEEE Data Engineering Bulletin, pp. 32-46, 2019. PDF
Bibtex
@article{pavlo19, author={Andrew Pavlo and Matthew Butrovich and Ananya Joshi and Lin Ma and Prashanth Menon and Dana Van Aken and Lisa Lee and Ruslan Salakhutdinov}, title={External vs. Internal: An Essay on Machine Learning Agents for Autonomous Database Management Systems}, journal={IEEE Data Engineering Bulletin}, month={June}, year={2019}, pages={32--46}, url = {https://db.cs.cmu.edu/papers/2019/pavlo-icde-bulletin2019.pdf}, }
- L. Ma, D. Van Aken, A. Hefny, G. Mezerhane, A. Pavlo, and G. J. Gordon, "Query-based Workload Forecasting for Self-Driving Database Management Systems," in Proceedings of the 2018 International Conference on Management of Data, 2018, pp. 631-645. PDF
Bibtex
@inproceedings{ma18, author = {Ma, Lin and Van Aken, Dana and Hefny, Ahmed and Mezerhane, Gustavo and Pavlo, Andrew and Gordon, Geoffrey J.}, title = {Query-based Workload Forecasting for Self-Driving Database Management Systems}, booktitle = {Proceedings of the 2018 International Conference on Management of Data}, series = {SIGMOD '18}, year = {2018}, pages = {631--645}, numpages = {15}, doi = {10.1145/3183713.3196908}, url = {https://db.cs.cmu.edu/papers/2018/mod435-maA.pdf}, }
- Z. Wang, A. Pavlo, H. Lim, V. Leis, H. Zhang, M. Kaminsky, and D. G. Andersen, "Building a Bw-Tree Takes More Than Just Buzz Words," in Proceedings of the 2018 ACM International Conference on Management of Data, 2018, pp. 473-488. PDF
Bibtex
@inproceedings{wang18, author = {Ziqi Wang and Andrew Pavlo and Hyeontaek Lim and Viktor Leis and Huanchen Zhang and Michael Kaminsky and David G. Andersen}, title = {Building a Bw-Tree Takes More Than Just Buzz Words}, booktitle = {Proceedings of the 2018 ACM International Conference on Management of Data}, series = {SIGMOD '18}, year = {2018}, pages = {473--488}, numpages = {16}, url = {https://db.cs.cmu.edu/papers/2018/mod342-wangA.pdf}, }
- P. Menon, T. C. Mowry, and A. Pavlo, "Relaxed Operator Fusion for In-Memory Databases: Making Compilation, Vectorization, and Prefetching Work Together At Last," Proc. VLDB Endow., vol. 11, iss. 1, pp. 1-13, 2017. PDF
Bibtex
@article{menon17, author = {Prashanth Menon and Todd C. Mowry and Andrew Pavlo}, title = {Relaxed Operator Fusion for In-Memory Databases: Making Compilation, Vectorization, and Prefetching Work Together At Last}, journal = {Proc. VLDB Endow.}, volume = {11}, number = {1}, month = {September}, year = {2017}, pages = {1--13}, publisher = {VLDB Endowment}, url = {https://db.cs.cmu.edu/papers/2017/p1-menon.pdf}, }
- A. Pavlo, G. Angulo, J. Arulraj, H. Lin, J. Lin, L. Ma, P. Menon, T. Mowry, M. Perron, I. Quah, S. Santurkar, A. Tomasic, S. Toor, D. V. Aken, Z. Wang, Y. Wu, R. Xian, and T. Zhang, "Self-Driving Database Management Systems," in CIDR 2017, Conference on Innovative Data Systems Research, 2017. PDF
Bibtex
@inproceedings{pavlo17, author = {Andrew Pavlo and Gustavo Angulo and Joy Arulraj and Haibin Lin and Jiexi Lin and Lin Ma and Prashanth Menon and Todd Mowry and Matthew Perron and Ian Quah and Siddharth Santurkar and Anthony Tomasic and Skye Toor and Dana Van Aken and Ziqi Wang and Yingjun Wu and Ran Xian and Tieying Zhang}, title = {Self-Driving Database Management Systems}, booktitle = {{CIDR} 2017, Conference on Innovative Data Systems Research}, year = {2017}, url = {https://db.cs.cmu.edu/papers/2017/p42-pavlo-cidr17.pdf}, }
- T. Nayak, "An Evaluation of Compilation-Based PL/PGSQL Execution," Master's Dissertation , 2021. PDF
Bibtex
@mastersthesis{nayak21, author = {Tanuj Nayak}, title = { An Evaluation of Compilation-Based PL/PGSQL Execution}, school = {Carnegie Mellon University}, year = {2021}, month = {February}, url = {http://reports-archive.adm.cs.cmu.edu/anon/2021/CMU-CS-21-101.pdf}, }