Automatically assigned DDC number: 00631

Manually assigned DDC number: 00631

Number of references: 4

Title: Learning and Exploitation do not Conflict under Minimax Optimality

Subject: Learning and Exploitation do not Conflict under Minimax Optimality

Description: . We show that adaptive real time dynamic programming extended with the action selection strategy which chooses the best action according to the latest estimate of the cost function yields asymptotically optimal policies within finite time under the minimax optimality criterion. From this it follows that learning and exploitation do not conflict under this special optimality criterion. We relate this result to learning optimal strategies in repeated two-player zero-sum deterministic games. Keywords. reinforcement learning, self-optimizing systems, dynamic games 1 Introduction Reinforcement learning (RL) concerns practical problems related to learning of optimal behaviour in sequential decision tasks. The most popular theoretical framework adopted by RL researchers is that of Markovian Decision Problems (MDPs). One of the main questions in RL is what extent of exploration is needed for a learner so that the price of exploration does not become too demanding. Usually some exploration (e...

Contributor: The Pennsylvania State University CiteSeer Archives

Publisher: unknown

Date: 1998-10-12

Format: ps

Identifier: http://citeseer.ist.psu.edu/147329.html

Source: http://sneaker.mindmaker.kfkipark.hu/~szepes/papers/ecml97.ps.gz

Language: en

Relation:

Relation:

Relation:

Relation:

Rights: unrestricted

Graph

<?xml   version="1.0"   encoding="UTF-8"?>

<references_metadata>

      <rec   ID="/291270.html"   Type="mastersthesis"   CiteSeer_Book=""   CiteSeer_Volume=""   Title="Modular   Neural   Networks   for   Learning   Context-Dependent   Game   Strategies,">

            <identifier   Org="ISBN:0262100657"   Paper_ID="/291270.html"   Extracted="0262100657"   />

            <identifier   Org="ISBN:0262201046"   Paper_ID="/291270.html"   Extracted="0262201046"   />

            <identifier   Org="ISBN:0262621118"   Paper_ID="/291270.html"   Extracted="0262621118"   DDC="570/.1/13"   Normalized_DDC="570113"   Normalized_Weight="0.14285714285714285"   />

            <identifier   Org="ISBN:0780319028"   Paper_ID="/291270.html"   Extracted="0780319028"   DDC="006.3"   Normalized_DDC="0063"   Normalized_Weight="0.14285714285714285"   />

            <identifier   Org="ISBN:0792397169"   Paper_ID="/291270.html"   Extracted="0792397169"   DDC="006.3/1"   Normalized_DDC="00631"   Normalized_Weight="0.14285714285714285"   />

            <identifier   Org="ISBN:0805815902"   Paper_ID="/291270.html"   Extracted="0805815902"   />

            <identifier   Org="ISBN:1558603352"   Paper_ID="/291270.html"   Extracted="1558603352"   DDC="006.3/1"   Normalized_DDC="00631"   Normalized_Weight="0.14285714285714285"   />

            <identifier   Org="ISBN:1590330218"   Paper_ID="/291270.html"   Extracted="1590330218"   DDC="006.3/1"   Normalized_DDC="00631"   Normalized_Weight="0.14285714285714285"   />

            <identifier   Org="ISBN:3540497196"   Paper_ID="/291270.html"   Extracted="3540497196"   DDC="629.8932"   Normalized_DDC="6298932"   Normalized_Weight="0.14285714285714285"   />

            <identifier   Org="ISBN:3540628584"   Paper_ID="/291270.html"   Extracted="3540628584"   DDC="006.3/1"   Normalized_DDC="00631"   Normalized_Weight="0.14285714285714285"   />

      </rec>

      <rec   ID="/68056.html"   Type="inproceedings"   CiteSeer_Book="Proceedings   of   the   13th   International   Conference   on   Machine   Learning   ICML96"   CiteSeer_Volume=""   Title="A   Generalized   Reinforcement-Learning   Model:   {C}onvergence   and   Applications,"   />

      <rec   ID="/40386.html"   Type="techreport"   CiteSeer_Book=""   CiteSeer_Volume=""   Title="Generalized   Markov   Decision   Processes:   Dynamic-programming   and   Reinforcement-learning   Algorithms,">

            <identifier   Org="ISBN:1558604197"   Paper_ID="/40386.html"   Extracted="1558604197"   />

            <identifier   Org="ISBN:1586038915"   Paper_ID="/40386.html"   Extracted="1586038915"   DDC="006.3"   Normalized_DDC="0063"   Normalized_Weight="0.25"   />

            <identifier   Org="ISBN:3540628584"   Paper_ID="/40386.html"   Extracted="3540628584"   DDC="006.3/1"   Normalized_DDC="00631"   Normalized_Weight="0.25"   />

            <identifier   Org="ISBN:3540688463"   Paper_ID="/40386.html"   Extracted="3540688463"   />

            <identifier   Org="ISBN:3540770003"   Paper_ID="/40386.html"   Extracted="3540770003"   />

            <identifier   Org="ISBN:3540878041"   Paper_ID="/40386.html"   Extracted="3540878041"   DDC="006.3"   Normalized_DDC="0063"   Normalized_Weight="0.25"   />

            <identifier   Org="ISBN:427490525X"   Paper_ID="/40386.html"   Extracted="427490525X"   DDC="006.3"   Normalized_DDC="0063"   Normalized_Weight="0.25"   />

      </rec>

      <rec   ID="SELF"   Type="SELF"   CiteSeer_Book="SELF"   CiteSeer_Volume="SELF"   Title="Learning   and   Exploitation   do   not   Conflict   under   Minimax   Optimality">

            <identifier   Org="ISBN:3540628584"   Paper_ID="SELF"   Extracted="3540628584"   DDC="006.3/1"   Normalized_DDC="00631"   Normalized_Weight="1.0"   />

      </rec>

      <rec   ID="/75955.html"   Type="incollection"   CiteSeer_Book="Advances   in   Neural   Information   Processing   Systems   7"   CiteSeer_Volume=""   Title="Learning   To   Play   the   Game   of   Chess,">

            <identifier   Org="ISBN:0262201046"   Paper_ID="/75955.html"   Extracted="0262201046"   />

            <identifier   Org="ISBN:0387741607"   Paper_ID="/75955.html"   Extracted="0387741607"   DDC="006.3"   Normalized_DDC="0063"   Normalized_Weight="0.07142857142857142"   />

            <identifier   Org="ISBN:0415111900"   Paper_ID="/75955.html"   Extracted="0415111900"   DDC="192"   Normalized_DDC="192"   Normalized_Weight="0.07142857142857142"   />

            <identifier   Org="ISBN:0780366573"   Paper_ID="/75955.html"   Extracted="0780366573"   DDC="006.3"   Normalized_DDC="0063"   Normalized_Weight="0.07142857142857142"   />

            <identifier   Org="ISBN:0791801764"   Paper_ID="/75955.html"   Extracted="0791801764"   />

            <identifier   Org="ISBN:0806528001"   Paper_ID="/75955.html"   Extracted="0806528001"   DDC="355.3434"   Normalized_DDC="3553434"   Normalized_Weight="0.07142857142857142"   />

            <identifier   Org="ISBN:1552126501"   Paper_ID="/75955.html"   Extracted="1552126501"   DDC="794./1"   Normalized_DDC="7941"   Normalized_Weight="0.07142857142857142"   />

            <identifier   Org="ISBN:1590330218"   Paper_ID="/75955.html"   Extracted="1590330218"   DDC="006.3/1"   Normalized_DDC="00631"   Normalized_Weight="0.07142857142857142"   />

            <identifier   Org="ISBN:1841693367"   Paper_ID="/75955.html"   Extracted="1841693367"   DDC="794"   Normalized_DDC="794"   Normalized_Weight="0.07142857142857142"   />

            <identifier   Org="ISBN:3540201211"   Paper_ID="/75955.html"   Extracted="3540201211"   DDC="006.3/1"   Normalized_DDC="00631"   Normalized_Weight="0.07142857142857142"   />

            <identifier   Org="ISBN:3540430806"   Paper_ID="/75955.html"   Extracted="3540430806"   DDC="794.81"   Normalized_DDC="79481"   Normalized_Weight="0.07142857142857142"   />

            <identifier   Org="ISBN:3540590528"   Paper_ID="/75955.html"   Extracted="3540590528"   DDC="006.3"   Normalized_DDC="0063"   Normalized_Weight="0.07142857142857142"   />

            <identifier   Org="ISBN:3540679251"   Paper_ID="/75955.html"   Extracted="3540679251"   DDC="006.3"   Normalized_DDC="0063"   Normalized_Weight="0.07142857142857142"   />

            <identifier   Org="ISBN:3540719830"   Paper_ID="/75955.html"   Extracted="3540719830"   DDC="006.3"   Normalized_DDC="0063"   Normalized_Weight="0.07142857142857142"   />

            <identifier   Org="ISBN:3540745645"   Paper_ID="/75955.html"   Extracted="3540745645"   DDC="006.3"   Normalized_DDC="0063"   Normalized_Weight="0.07142857142857142"   />

            <identifier   Org="ISBN:3790813486"   Paper_ID="/75955.html"   Extracted="3790813486"   DDC="794.8/151"   Normalized_DDC="7948151"   Normalized_Weight="0.07142857142857142"   />

      </rec>

</references_metadata>

www.000webhost.com