{"id":"https://openalex.org/W2787471386","doi":"https://doi.org/10.1109/tnnls.2018.2812709","title":"Guided Policy Exploration for Markov Decision Processes Using an Uncertainty-Based Value-of-Information Criterion","display_name":"Guided Policy Exploration for Markov Decision Processes Using an Uncertainty-Based Value-of-Information Criterion","publication_year":2018,"publication_date":"2018-03-27","ids":{"openalex":"https://openalex.org/W2787471386","doi":"https://doi.org/10.1109/tnnls.2018.2812709","mag":"2787471386","pmid":"https://pubmed.ncbi.nlm.nih.gov/29771664"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2018.2812709","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2018.2812709","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1802.01518","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049310005","display_name":"Isaac J. Sledge","orcid":"https://orcid.org/0000-0002-7755-5886"},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Isaac J. Sledge","raw_affiliation_strings":["Computational NeuroEngineering Laboratory, University of Florida, Gainesville, FL, USA"],"affiliations":[{"raw_affiliation_string":"Computational NeuroEngineering Laboratory, University of Florida, Gainesville, FL, USA","institution_ids":["https://openalex.org/I33213144"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025034683","display_name":"Matthew S. Emigh","orcid":"https://orcid.org/0000-0002-1438-6069"},"institutions":[{"id":"https://openalex.org/I2802287952","display_name":"Naval Surface Warfare Center","ror":"https://ror.org/03d4ecn10","country_code":"US","type":"facility","lineage":["https://openalex.org/I1328969757","https://openalex.org/I1330347796","https://openalex.org/I2802287952","https://openalex.org/I3130687028"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matthew S. Emigh","raw_affiliation_strings":["U.S. Naval Surface Warfare Center, Panama City, FL, USA"],"affiliations":[{"raw_affiliation_string":"U.S. Naval Surface Warfare Center, Panama City, FL, USA","institution_ids":["https://openalex.org/I2802287952"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019504861","display_name":"Jos\u00e9 C. Pr\u0131\u0301ncipe","orcid":"https://orcid.org/0000-0002-3449-3531"},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jose C. Principe","raw_affiliation_strings":["Department of Biomedical Engineering, University of Florida, Gainesville, FL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Biomedical Engineering, University of Florida, Gainesville, FL, USA","institution_ids":["https://openalex.org/I33213144"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5049310005"],"corresponding_institution_ids":["https://openalex.org/I33213144"],"apc_list":null,"apc_paid":null,"fwci":2.7076,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.9213894,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"29","issue":"6","first_page":"2080","last_page":"2098"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10328","display_name":"Supply Chain and Inventory Management","score":0.9779000282287598,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6888965368270874},{"id":"https://openalex.org/keywords/value-of-information","display_name":"Value of information","score":0.5915820002555847},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.5837554335594177},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4056099057197571},{"id":"https://openalex.org/keywords/management-science","display_name":"Management science","score":0.3899339735507965},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.35712799429893494},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.34530186653137207},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.30818212032318115},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.278167188167572},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.27349576354026794},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.2711833119392395},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.22671493887901306}],"concepts":[{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6888965368270874},{"id":"https://openalex.org/C92424840","wikidata":"https://www.wikidata.org/wiki/Q7912772","display_name":"Value of information","level":2,"score":0.5915820002555847},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.5837554335594177},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4056099057197571},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.3899339735507965},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.35712799429893494},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.34530186653137207},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.30818212032318115},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.278167188167572},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27349576354026794},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2711833119392395},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.22671493887901306}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tnnls.2018.2812709","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2018.2812709","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:29771664","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/29771664","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null},{"id":"pmh:oai:arXiv.org:1802.01518","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1802.01518","pdf_url":"https://arxiv.org/pdf/1802.01518","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1802.01518","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1802.01518","pdf_url":"https://arxiv.org/pdf/1802.01518","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.75,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G3320180373","display_name":null,"funder_award_id":"N00014-14-1-0542","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G5707073744","display_name":null,"funder_award_id":"N00014-15-1-2103","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320310009","display_name":"University of Florida","ror":"https://ror.org/02y3ad647"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W1491843047","https://openalex.org/W1496855202","https://openalex.org/W1505937442","https://openalex.org/W1507852408","https://openalex.org/W1508634047","https://openalex.org/W1554015367","https://openalex.org/W1586504939","https://openalex.org/W1624804034","https://openalex.org/W1968136409","https://openalex.org/W1976578332","https://openalex.org/W2009533501","https://openalex.org/W2010343671","https://openalex.org/W2027028757","https://openalex.org/W2048972052","https://openalex.org/W2074225934","https://openalex.org/W2083143894","https://openalex.org/W2106639887","https://openalex.org/W2107726111","https://openalex.org/W2116459397","https://openalex.org/W2118219918","https://openalex.org/W2121863487","https://openalex.org/W2123447947","https://openalex.org/W2124352385","https://openalex.org/W2129670787","https://openalex.org/W2132849848","https://openalex.org/W2151268438","https://openalex.org/W2156974606","https://openalex.org/W2164569010","https://openalex.org/W2165131254","https://openalex.org/W2168839459","https://openalex.org/W2343090116","https://openalex.org/W2405379562","https://openalex.org/W2462102501","https://openalex.org/W2489939061","https://openalex.org/W2592509915","https://openalex.org/W2594564328","https://openalex.org/W2606378794","https://openalex.org/W2610841196","https://openalex.org/W2708325519","https://openalex.org/W2789788032","https://openalex.org/W2964039359","https://openalex.org/W2964117027","https://openalex.org/W3004707083","https://openalex.org/W3105659172","https://openalex.org/W4214717370","https://openalex.org/W4230367971","https://openalex.org/W4233061323","https://openalex.org/W4245768123","https://openalex.org/W4285719527","https://openalex.org/W6629614444","https://openalex.org/W6630205563","https://openalex.org/W6630247960","https://openalex.org/W6630491230","https://openalex.org/W6633172667","https://openalex.org/W6675901991","https://openalex.org/W6678008115","https://openalex.org/W6678481081","https://openalex.org/W6679599195","https://openalex.org/W6682489347","https://openalex.org/W6684985892","https://openalex.org/W6745774501","https://openalex.org/W6781870204"],"related_works":["https://openalex.org/W2004597074","https://openalex.org/W4213214852","https://openalex.org/W2512014291","https://openalex.org/W1589140671","https://openalex.org/W2315999538","https://openalex.org/W187740018","https://openalex.org/W2162286586","https://openalex.org/W4255368532","https://openalex.org/W2514437865","https://openalex.org/W846528073"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,109],"in":[2,31,116,167],"environments":[3],"with":[4,126],"many":[5],"action-state":[6],"pairs":[7],"is":[8,12,73],"challenging.":[9],"The":[10,97],"issue":[11],"the":[13,21,41,46,68,76,84,91,94,113,133,141,146,151,177,189],"number":[14],"of":[15,40,78,93,99,140],"episodes":[16,169],"needed":[17],"to":[18,119,193],"thoroughly":[19],"search":[20,29,95,134],"policy":[22,42,69,114,142,190],"space.":[23,70,143],"Most":[24],"conventional":[25],"heuristics":[26],"address":[27],"this":[28,51,124],"problem":[30],"a":[32,80,102,117,127],"stochastic":[33,62,103],"manner.":[34,121],"This":[35],"can":[36,111,183],"leave":[37],"large":[38],"portions":[39],"space":[43,115],"unvisited":[44],"during":[45,108],"early":[47],"training":[48,178],"stages.":[49],"In":[50],"paper,":[52],"we":[53],"propose":[54],"an":[55],"uncertainty-based,":[56],"information-theoretic":[57],"approach":[58,72,162,182],"for":[59,105,180],"performing":[60,165],"guided":[61],"searches":[63],"that":[64,82,110,160,176],"more":[65],"effectively":[66],"cover":[67],"Our":[71,157],"based":[74],"on":[75,150],"value":[77,98],"information,":[79],"criterion":[81,125],"provides":[83],"optimal":[85],"tradeoff":[86],"between":[87],"expected":[88],"costs":[89],"and":[90,154],"granularity":[92],"process.":[96],"information":[100],"yields":[101,163],"routine":[104],"choosing":[106],"actions":[107],"explore":[112],"coarse":[118],"fine":[120],"We":[122,144,174],"augment":[123],"state-transition":[128],"uncertainty":[129],"factor,":[130],"which":[131],"guides":[132],"process":[135],"into":[136],"previously":[137],"unexplored":[138],"regions":[139],"evaluate":[145],"uncertainty-based":[147],"value-of-information":[148],"policies":[149,166],"games":[152],"Centipede":[153],"Crossy":[155],"Road.":[156],"results":[158],"indicate":[159],"our":[161,181,195],"better":[164],"fewer":[168],"than":[170],"stochastic-based":[171],"exploration":[172],"strategies.":[173],"show":[175],"rate":[179],"be":[184],"further":[185],"improved":[186],"by":[187],"using":[188],"cross":[191],"entropy":[192],"guide":[194],"criterion's":[196],"hyperparameter":[197],"selection.":[198]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":3}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
