@inproceedings{degroote_reinforcement_2016, title = {Reinforcement {Learning} for {Automatic} {Online} {Algorithm} {Selection} - an {Empirical} {Study}}, volume = {1649}, abstract = {In this paper a reinforcement learning methodology for automatic online algorithm selection is introduced and empirically tested. It is applicable to automatic algorithm selection methods that predict the performance of each available algorithm and then pick the best one. The experiments confirm the usefulness of the methodology: using online data results in better performance. As in many online learning settings an exploration vs. exploitation trade-off, synonymously learning vs. earning trade-off, is incurred. Empirically investigating the quality of classic solution strategies for handling this trade-off in the automatic online algorithm selection setting is the secondary goal of this paper. The automatic online algorithm selection problem can be modelled as a contextual multi-armed bandit problem. Two classic strategies for solving this problem are tested in the context of automatic online algorithm selection: epsilon-greedy and lower confidence bound. The experiments show that a simple purely exploitative greedy strategy outperforms strategies explicitly performing exploration.}, booktitle = {{ITAT}}, publisher = {CEUR Workshop Proceedings}, author = {Degroote, Hans and Bischl, Bernd and Kotthoff, Lars and de Causmacker, Patrick}, month = sep, year = {2016}, pages = {93--101}, month_numeric = {9} }