{"id":"https://openalex.org/W2071355102","doi":"https://doi.org/10.1142/s0218213015400059","title":"A Scalable Approach to Harvest Modern Weblogs","display_name":"A Scalable Approach to Harvest Modern Weblogs","publication_year":2015,"publication_date":"2015-04-01","ids":{"openalex":"https://openalex.org/W2071355102","doi":"https://doi.org/10.1142/s0218213015400059","mag":"2071355102"},"language":"en","primary_location":{"id":"doi:10.1142/s0218213015400059","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218213015400059","pdf_url":null,"source":{"id":"https://openalex.org/S178780388","display_name":"International Journal of Artificial Intelligence Tools","issn_l":"0218-2130","issn":["0218-2130","1793-6349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal on Artificial Intelligence Tools","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://infoscience.epfl.ch/handle/20.500.14299/114342","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054863295","display_name":"Vangelis Banos","orcid":null},"institutions":[{"id":"https://openalex.org/I21370196","display_name":"Aristotle University of Thessaloniki","ror":"https://ror.org/02j61yw88","country_code":"GR","type":"education","lineage":["https://openalex.org/I21370196"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Vangelis Banos","raw_affiliation_strings":["Department of Informatics, Aristotle University of Thessaloniki (AUTH), Thessaloniki, 54124, Greece"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Aristotle University of Thessaloniki (AUTH), Thessaloniki, 54124, Greece","institution_ids":["https://openalex.org/I21370196"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014926250","display_name":"Olivier Blanvillain","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Olivier Blanvillain","raw_affiliation_strings":["\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), 1015 Lausanne, Switzerland"],"affiliations":[{"raw_affiliation_string":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), 1015 Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083270440","display_name":"Nikos Kasioumis","orcid":null},"institutions":[{"id":"https://openalex.org/I67311998","display_name":"European Organization for Nuclear Research","ror":"https://ror.org/01ggx4157","country_code":"CH","type":"facility","lineage":["https://openalex.org/I67311998"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Nikos Kasioumis","raw_affiliation_strings":["European Organization for Nuclear Research (CERN), Geneva 23, 1211, Switzerland"],"affiliations":[{"raw_affiliation_string":"European Organization for Nuclear Research (CERN), Geneva 23, 1211, Switzerland","institution_ids":["https://openalex.org/I67311998"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010058559","display_name":"Yannis Manolopoulos","orcid":"https://orcid.org/0000-0003-4026-4329"},"institutions":[{"id":"https://openalex.org/I21370196","display_name":"Aristotle University of Thessaloniki","ror":"https://ror.org/02j61yw88","country_code":"GR","type":"education","lineage":["https://openalex.org/I21370196"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Yannis Manolopoulos","raw_affiliation_strings":["Department of Informatics, Aristotle University of Thessaloniki (AUTH), Thessaloniki, 54124, Greece"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Aristotle University of Thessaloniki (AUTH), Thessaloniki, 54124, Greece","institution_ids":["https://openalex.org/I21370196"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5054863295"],"corresponding_institution_ids":["https://openalex.org/I21370196"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.09286592,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"24","issue":"02","first_page":"1540005","last_page":"1540005"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9735000133514404,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9591000080108643,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9043875932693481},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.8001865148544312},{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.7701835632324219},{"id":"https://openalex.org/keywords/interoperability","display_name":"Interoperability","score":0.7349501848220825},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.6782305836677551},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.6120060682296753},{"id":"https://openalex.org/keywords/modularity","display_name":"Modularity (biology)","score":0.5830118656158447},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5277410745620728},{"id":"https://openalex.org/keywords/hypertext","display_name":"Hypertext","score":0.5253581404685974},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.46483609080314636},{"id":"https://openalex.org/keywords/sketch","display_name":"Sketch","score":0.43295615911483765},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.4235781729221344},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.42084434628486633},{"id":"https://openalex.org/keywords/web-application","display_name":"Web application","score":0.4155627489089966},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2953672409057617},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.12815290689468384}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9043875932693481},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.8001865148544312},{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.7701835632324219},{"id":"https://openalex.org/C20136886","wikidata":"https://www.wikidata.org/wiki/Q749647","display_name":"Interoperability","level":2,"score":0.7349501848220825},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.6782305836677551},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.6120060682296753},{"id":"https://openalex.org/C2779478453","wikidata":"https://www.wikidata.org/wiki/Q6889748","display_name":"Modularity (biology)","level":2,"score":0.5830118656158447},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5277410745620728},{"id":"https://openalex.org/C162215914","wikidata":"https://www.wikidata.org/wiki/Q93241","display_name":"Hypertext","level":2,"score":0.5253581404685974},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.46483609080314636},{"id":"https://openalex.org/C2779231336","wikidata":"https://www.wikidata.org/wiki/Q7534724","display_name":"Sketch","level":2,"score":0.43295615911483765},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.4235781729221344},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.42084434628486633},{"id":"https://openalex.org/C118643609","wikidata":"https://www.wikidata.org/wiki/Q189210","display_name":"Web application","level":2,"score":0.4155627489089966},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2953672409057617},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.12815290689468384},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1142/s0218213015400059","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218213015400059","pdf_url":null,"source":{"id":"https://openalex.org/S178780388","display_name":"International Journal of Artificial Intelligence Tools","issn_l":"0218-2130","issn":["0218-2130","1793-6349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal on Artificial Intelligence Tools","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.726.5565","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.726.5565","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://delab.csd.auth.gr/papers/IJAIT2015bbkm.pdf","raw_type":"text"},{"id":"pmh:oai:infoscience.epfl.ch:208256","is_oa":false,"landing_page_url":"http://infoscience.epfl.ch/record/208256","pdf_url":null,"source":{"id":"https://openalex.org/S4306400488","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},{"id":"pmh:oai:infoscience.tind.io:208256","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/114342","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"research article"}],"best_oa_location":{"id":"pmh:oai:infoscience.tind.io:208256","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/114342","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"research article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.4300000071525574,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320279","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1582702620","https://openalex.org/W1613836731","https://openalex.org/W1616576116","https://openalex.org/W1647671624","https://openalex.org/W1970954830","https://openalex.org/W1973986623","https://openalex.org/W1987869189","https://openalex.org/W2000273502","https://openalex.org/W2064682298","https://openalex.org/W2114541504","https://openalex.org/W2129595335","https://openalex.org/W2130695501","https://openalex.org/W2164542999","https://openalex.org/W2319012534","https://openalex.org/W4214671568","https://openalex.org/W4233570883","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2378994405","https://openalex.org/W2385974820","https://openalex.org/W2373478030","https://openalex.org/W2378679551","https://openalex.org/W3149739944","https://openalex.org/W2392363776","https://openalex.org/W2063051341","https://openalex.org/W1494563618","https://openalex.org/W2357022711","https://openalex.org/W3014696621"],"abstract_inverted_index":{"Blogs":[0],"are":[1],"one":[2],"of":[3,8,23,57,66,145,150],"the":[4,11,54,58,67,113,146],"most":[5],"prominent":[6],"means":[7],"communication":[9],"on":[10,75,109],"web.":[12],"Their":[13],"content,":[14],"interconnections":[15],"and":[16,38,47,86,101,135,148],"influence":[17],"constitute":[18],"a":[19,63,97,125],"unique":[20],"socio-technical":[21],"artefact":[22],"our":[24,72,151],"times":[25],"which":[26,61,128],"needs":[27],"to":[28,43,77,104],"be":[29],"preserved.":[30],"The":[31],"BlogForever":[32,68],"project":[33],"has":[34],"established":[35],"best":[36],"practices":[37],"developed":[39],"an":[40],"innovative":[41],"system":[42,126],"harvest,":[44],"preserve,":[45],"manage":[46],"reuse":[48],"blog":[49,59,89,120],"content.":[50],"This":[51],"paper":[52],"presents":[53],"latest":[55],"developments":[56],"crawler":[60],"is":[62,129],"key":[64],"component":[65],"platform.":[69],"More":[70],"precisely,":[71],"work":[73],"concentrates":[74],"techniques":[76],"automatically":[78],"extract":[79],"content":[80],"such":[81],"as":[82],"articles,":[83],"authors,":[84],"dates":[85],"comments":[87],"from":[88],"posts.":[90],"To":[91],"achieve":[92],"this":[93],"goal,":[94],"we":[95,123,141],"introduce":[96],"simple":[98],"yet":[99],"robust":[100],"scalable":[102],"algorithm":[103],"generate":[105],"extraction":[106],"rules":[107],"based":[108],"string":[110],"matching":[111],"using":[112],"blog's":[114],"web":[115],"feed":[116],"in":[117],"conjunction":[118],"with":[119,137],"hypertext.":[121],"Furthermore,":[122],"present":[124],"architecture":[127],"characterised":[130],"by":[131],"efficiency,":[132],"modularity,":[133],"scalability":[134],"interoperability":[136],"third-party":[138],"systems.":[139],"Finally,":[140],"conduct":[142],"thorough":[143],"evaluations":[144],"performance":[147],"accuracy":[149],"system.":[152]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
