{"id":"https://openalex.org/W7125287743","doi":"https://doi.org/10.48550/arxiv.2601.12323","title":"MARO: Learning Stronger Reasoning from Social Interaction","display_name":"MARO: Learning Stronger Reasoning from Social Interaction","publication_year":2026,"publication_date":"2026-01-18","ids":{"openalex":"https://openalex.org/W7125287743","doi":"https://doi.org/10.48550/arxiv.2601.12323"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.12323","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.12323","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.12323","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030904112","display_name":"Cai Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cai, Yin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123514956","display_name":"Zhouhong Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Zhouhong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123479507","display_name":"Juntao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Juntao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5123517730","display_name":"Ping Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Ping","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5030904112"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.22920000553131104,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.22920000553131104,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.11490000039339066,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.07000000029802322,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.4756999909877777},{"id":"https://openalex.org/keywords/social-learning","display_name":"Social learning","score":0.43479999899864197},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.4221999943256378},{"id":"https://openalex.org/keywords/social-relation","display_name":"Social relation","score":0.38429999351501465},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.3785000145435333},{"id":"https://openalex.org/keywords/case-based-reasoning","display_name":"Case-based reasoning","score":0.37310001254081726},{"id":"https://openalex.org/keywords/competition","display_name":"Competition (biology)","score":0.3366999924182892}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5769000053405762},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5630999803543091},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.4756999909877777},{"id":"https://openalex.org/C79416737","wikidata":"https://www.wikidata.org/wiki/Q2305519","display_name":"Social learning","level":2,"score":0.43479999899864197},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.4221999943256378},{"id":"https://openalex.org/C130064352","wikidata":"https://www.wikidata.org/wiki/Q853725","display_name":"Social relation","level":2,"score":0.38429999351501465},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.3785000145435333},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.37310001254081726},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3659000098705292},{"id":"https://openalex.org/C91306197","wikidata":"https://www.wikidata.org/wiki/Q45767","display_name":"Competition (biology)","level":2,"score":0.3366999924182892},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.3248000144958496},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.31839999556541443},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.3073999881744385},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3068000078201294},{"id":"https://openalex.org/C2777938197","wikidata":"https://www.wikidata.org/wiki/Q7834022","display_name":"Transfer of training","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.28049999475479126},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2667999863624573},{"id":"https://openalex.org/C37228920","wikidata":"https://www.wikidata.org/wiki/Q1307600","display_name":"Experiential learning","level":2,"score":0.25780001282691956}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.12323","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.12323","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.12323","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.12323","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Humans":[0],"face":[1],"countless":[2],"scenarios":[3,36],"that":[4,56,133,146],"require":[5],"reasoning":[6,65,142,163,180],"and":[7,40,69,164],"judgment":[8],"in":[9,34,71,140,176],"daily":[10],"life.":[11],"However,":[12],"existing":[13,25],"large":[14,58],"language":[15,59],"model":[16],"training":[17,110],"methods":[18],"primarily":[19],"allow":[20],"models":[21,60],"to":[22,62,157],"learn":[23],"from":[24],"textual":[26],"content":[27],"or":[28,88],"solve":[29],"predetermined":[30],"problems,":[31],"lacking":[32],"experience":[33],"real":[35],"involving":[37],"interaction,":[38],"negotiation,":[39],"competition":[41],"with":[42],"others.":[43],"To":[44],"address":[45],"this,":[46],"this":[47],"paper":[48],"proposes":[49],"Multi-Agent":[50],"Reward":[51],"Optimization":[52],"(MARO),":[53],"a":[54],"method":[55],"enables":[57],"(LLMs)":[61],"acquire":[63],"stronger":[64],"abilities":[66,148],"by":[67,84,107,122],"learning":[68,81,153,175],"practicing":[70],"multi-agent":[72,173],"social":[73,141,151,174],"environments.":[74],"Specifically,":[75],"MARO":[76,134],"first":[77],"addresses":[78,118],"the":[79,96,102,109,125,147,169,178],"sparse":[80],"signal":[82],"problem":[83,106],"decomposing":[85],"final":[86],"success":[87],"failure":[89],"outcomes":[90],"into":[91],"each":[92,128],"specific":[93],"behavior":[94],"during":[95],"interaction":[97],"process;":[98],"second,":[99],"it":[100,117],"handles":[101],"uneven":[103],"role":[104],"distribution":[105],"balancing":[108],"sample":[111],"weights":[112],"of":[113,127,172,182],"different":[114],"roles;":[115],"finally,":[116],"environmental":[119],"instability":[120],"issues":[121],"directly":[123],"evaluating":[124],"utility":[126],"behavior.":[129],"Experimental":[130],"results":[131],"demonstrate":[132],"not":[135],"only":[136],"achieves":[137],"significant":[138],"improvements":[139],"capabilities,":[143],"but":[144],"also":[145],"acquired":[149],"through":[150],"simulation":[152],"can":[154],"effectively":[155],"transfer":[156],"other":[158],"tasks":[159],"such":[160],"as":[161],"mathematical":[162],"instruction":[165],"following.":[166],"This":[167],"reveals":[168],"tremendous":[170],"potential":[171],"enhancing":[177],"general":[179],"capabilities":[181],"LLMs.":[183]},"counts_by_year":[],"updated_date":"2026-01-22T23:33:04.759266","created_date":"2026-01-22T00:00:00"}
