[ { "@graph" : [ { "@id" : "http://purl.org/np/RA4FceIJRS9Gc35T7LlyJoh1kLSWAZmf5I0atlziWIbUY", "@type" : [ "http://www.nanopub.org/nschema#Nanopublication" ], "http://www.nanopub.org/nschema#hasAssertion" : [ { "@id" : "http://purl.org/np/RA4FceIJRS9Gc35T7LlyJoh1kLSWAZmf5I0atlziWIbUY#assertion" } ], "http://www.nanopub.org/nschema#hasProvenance" : [ { "@id" : "http://purl.org/np/RA4FceIJRS9Gc35T7LlyJoh1kLSWAZmf5I0atlziWIbUY#provenance" } ], "http://www.nanopub.org/nschema#hasPublicationInfo" : [ { "@id" : "http://purl.org/np/RA4FceIJRS9Gc35T7LlyJoh1kLSWAZmf5I0atlziWIbUY#pubinfo" } ] } ], "@id" : "http://purl.org/np/RA4FceIJRS9Gc35T7LlyJoh1kLSWAZmf5I0atlziWIbUY#Head" }, { "@graph" : [ { "@id" : "http://purl.org/np/RA4FceIJRS9Gc35T7LlyJoh1kLSWAZmf5I0atlziWIbUY#abstract", "@type" : [ "http://purl.org/spar/doco/Abstract", "http://purl.org/spar/doco/Paragraph" ], "http://purl.org/spar/c4o/hasContent" : [ { "@value" : "The Web has evolved into a huge mine of knowledge carved in different forms, the predominant one still being the free-text document. This motivates the need for Intelligent Web-reading Agents: hypothetically, they would skim through disparate Web sources corpora and generate meaningful structured assertions to fuel Knowledge Bases (KBs). Ultimately, comprehensive KBs, like Wikidata and DBpedia, play a fundamental role to cope with the issue of information overload. On account of such vision, this paper depicts the F ACT E XTRACTOR , a complete Natural Language Processing (NLP) pipeline which reads an input textual corpus and produces machine-readable statements. Each statement is supplied with a confidence score and undergoes a disambiguation step via entity linking, thus allowing the assignment of KB-compliant URIs. The system implements four research contributions: it (1) executes N-ary relation extraction by applying the Frame Semantics linguistic theory, as opposed to binary techniques; it (2) jointly populates both the T-Box and the A-Box of the target KB; it (3) relies on a lightweight NLP machinery, namely part-of-speech tagging only; it (4) enables a completely supervised yet reasonably priced machine learning environment through a crowdsourcing strategy. We assess our approach by setting the target KB to DBpedia and by considering a use case of 52, 000 Italian Wikipedia soccer player articles. Out of those, we yield a dataset of more than 213, 000 triples with a 78.5% F 1 . We corroborate the evaluation via (i) a performance comparison with a baseline system, as well as (ii) an analysis of the T-Box and A-Box augmentation capabilities. The outcomes are incorporated into the Italian DBpedia chapter, can be queried through its SPARQL endpoint, and/or downloaded as standalone data dumps. The codebase is released as free software and is publicly available in the DBpedia Association repository." } ] } ], "@id" : "http://purl.org/np/RA4FceIJRS9Gc35T7LlyJoh1kLSWAZmf5I0atlziWIbUY#assertion" }, { "@graph" : [ { "@id" : "http://purl.org/np/RA4FceIJRS9Gc35T7LlyJoh1kLSWAZmf5I0atlziWIbUY#assertion", "http://www.w3.org/ns/prov#hadPrimarySource" : [ { "@id" : "http://dx.doi.org/10.3233/SW-170269" } ], "http://www.w3.org/ns/prov#wasAttributedTo" : [ { "@id" : "https://orcid.org/0000-0002-5456-7964" } ] } ], "@id" : "http://purl.org/np/RA4FceIJRS9Gc35T7LlyJoh1kLSWAZmf5I0atlziWIbUY#provenance" }, { "@graph" : [ { "@id" : "http://purl.org/np/RA4FceIJRS9Gc35T7LlyJoh1kLSWAZmf5I0atlziWIbUY", "http://purl.org/dc/terms/created" : [ { "@type" : "http://www.w3.org/2001/XMLSchema#dateTime", "@value" : "2019-11-10T12:34:11+01:00" } ], "http://purl.org/pav/createdBy" : [ { "@id" : "https://orcid.org/0000-0002-7114-6459" } ] } ], "@id" : "http://purl.org/np/RA4FceIJRS9Gc35T7LlyJoh1kLSWAZmf5I0atlziWIbUY#pubinfo" } ]