{"id":"https://openalex.org/W4416775342","doi":"https://doi.org/10.48550/arxiv.2507.00310","title":"AutoDiscovery: Open-ended Scientific Discovery via Bayesian Surprise","display_name":"AutoDiscovery: Open-ended Scientific Discovery via Bayesian Surprise","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416775342","doi":"https://doi.org/10.48550/arxiv.2507.00310"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2507.00310","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.00310","pdf_url":"https://arxiv.org/pdf/2507.00310","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2507.00310","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014154126","display_name":"D. P. Agarwal","orcid":"https://orcid.org/0000-0002-1090-3583"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Agarwal, Dhruv","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069912951","display_name":"Bodhisattwa Prasad Majumder","orcid":"https://orcid.org/0009-0000-2691-305X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Majumder, Bodhisattwa Prasad","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054706240","display_name":"Bob Adamson","orcid":"https://orcid.org/0000-0001-6747-0195"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Adamson, Reece","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Chakravorty, Megha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chakravorty, Megha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120435725","display_name":"Satvika Reddy Gavireddy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gavireddy, Satvika Reddy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017240523","display_name":"Aditya Parashar","orcid":"https://orcid.org/0009-0007-2592-2561"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Parashar, Aditya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084474211","display_name":"Harshit Surana","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Surana, Harshit","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087135401","display_name":"Bhavana Dalvi Mishra","orcid":"https://orcid.org/0000-0002-3813-8641"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mishra, Bhavana Dalvi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107835063","display_name":"Andrew McCallum","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"McCallum, Andrew","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077726785","display_name":"Ashish Sabharwal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sabharwal, Ashish","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5049837138","display_name":"Peter Clark","orcid":"https://orcid.org/0000-0003-1001-9226"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Clark, Peter","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5014154126"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.2732999920845032,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.2732999920845032,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.08540000021457672,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.0851999968290329,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scientific-discovery","display_name":"Scientific discovery","score":0.7538999915122986},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.6432999968528748},{"id":"https://openalex.org/keywords/surprise","display_name":"Surprise","score":0.5884000062942505},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.4413999915122986},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.37929999828338623},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.37459999322891235},{"id":"https://openalex.org/keywords/bayesian-inference","display_name":"Bayesian inference","score":0.35359999537467957},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.3327000141143799}],"concepts":[{"id":"https://openalex.org/C2984917352","wikidata":"https://www.wikidata.org/wiki/Q12772819","display_name":"Scientific discovery","level":2,"score":0.7538999915122986},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6547999978065491},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.6432999968528748},{"id":"https://openalex.org/C2780343955","wikidata":"https://www.wikidata.org/wiki/Q333173","display_name":"Surprise","level":2,"score":0.5884000062942505},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5295000076293945},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4616999924182892},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.45750001072883606},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.4413999915122986},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.37929999828338623},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.37459999322891235},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.35359999537467957},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.3327000141143799},{"id":"https://openalex.org/C33724603","wikidata":"https://www.wikidata.org/wiki/Q812540","display_name":"Bayesian network","level":2,"score":0.3181000053882599},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.31520000100135803},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3009999990463257},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.26750001311302185},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2563000023365021}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:arXiv.org:2507.00310","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.00310","pdf_url":"https://arxiv.org/pdf/2507.00310","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:doi:10.48550/arxiv.2507.00310","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2507.00310","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2507.00310","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2507.00310","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.00310","pdf_url":"https://arxiv.org/pdf/2507.00310","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,65],"promise":[1],"of":[2,29,148,176,222],"autonomous":[3],"scientific":[4,47,116],"discovery":[5,48,178],"(ASD)":[6],"hinges":[7],"not":[8],"only":[9],"on":[10,15,38,75],"answering":[11],"questions,":[12],"but":[13,84],"also":[14],"knowing":[16],"which":[17],"questions":[18,41],"to":[19,42,58,88,135,230],"ask.":[20],"Most":[21],"recent":[22],"works":[23],"in":[24,34,69,173],"ASD":[25,71,112,244],"explore":[26,145],"the":[27,55,85,91,97,124,128,146,167,174,213],"use":[28],"large":[30],"language":[31],"models":[32],"(LLMs)":[33],"goal-driven":[35],"settings,":[36],"relying":[37],"human-specified":[39],"research":[40],"guide":[43],"hypothesis":[44,94,134],"generation.":[45],"However,":[46],"may":[49],"be":[50],"accelerated":[51],"further":[52,218],"by":[53,61,205,212,225],"allowing":[54],"AI":[56],"system":[57,227],"drive":[59],"exploration":[60,117],"its":[62,136],"own":[63],"criteria.":[64],"few":[66],"existing":[67],"approaches":[68],"open-ended":[70,111,243],"select":[72],"hypotheses":[73],"based":[74],"diversity":[76],"heuristics":[77],"or":[78],"subjective":[79],"proxies":[80],"for":[81,110],"human":[82,216],"interestingness,":[83],"former":[86],"struggles":[87],"meaningfully":[89],"navigate":[90],"typically":[92],"vast":[93],"space,":[95],"and":[96,190],"latter":[98],"suffers":[99],"from":[100,127],"imprecise":[101],"definitions.":[102],"This":[103],"paper":[104],"presents":[105],"AutoDiscovery":[106,172,201],"--":[107],"a":[108,133,154,198],"method":[109,152],"that":[113,196,220],"instead":[114],"drives":[115],"using":[118,164],"Bayesian":[119],"surprise.":[120],"Here,":[121],"we":[122],"quantify":[123],"epistemic":[125],"shift":[126],"LLM's":[129],"prior":[130],"beliefs":[131,138],"about":[132],"posterior":[137],"after":[139],"gathering":[140],"experimental":[141],"results.":[142],"To":[143],"efficiently":[144],"space":[147],"nested":[149],"hypotheses,":[150],"our":[151,226],"employs":[153],"Monte":[155],"Carlo":[156],"tree":[157],"search":[158],"(MCTS)":[159],"strategy":[160],"with":[161],"progressive":[162],"widening":[163],"surprisal":[165],"as":[166,186,233],"reward":[168],"function.":[169],"We":[170],"evaluate":[171],"setting":[175],"data-driven":[177],"across":[179],"21":[180],"real-world":[181],"datasets":[182],"spanning":[183],"domains":[184],"such":[185],"biology,":[187],"economics,":[188],"finance,":[189],"behavioral":[191],"science.":[192],"Our":[193,215],"results":[194],"demonstrate":[195],"under":[197],"fixed":[199],"budget,":[200],"substantially":[202],"outperforms":[203],"competitors":[204],"producing":[206],"5-29%":[207],"more":[208],"discoveries":[209,223],"deemed":[210],"surprising":[211,229],"LLM.":[214],"evaluation":[217],"reveals":[219],"two-thirds":[221],"made":[224],"are":[228],"domain":[231],"experts":[232],"well,":[234],"suggesting":[235],"this":[236],"is":[237],"an":[238],"important":[239],"step":[240],"towards":[241],"building":[242],"systems.":[245]},"counts_by_year":[],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
