pimlico.datatypes.coref.corenlp module

Datatypes for coreference resolution output. Based on Stanford CoreNLP’s coref output, so includes all the information provided by that.

class CorefDocumentType(options, metadata)[source]

Bases: pimlico.datatypes.jsondoc.JsonDocumentType

process_document(doc)[source]
class CorefCorpus(base_dir, pipeline, **kwargs)[source]

Bases: pimlico.datatypes.jsondoc.JsonDocumentCorpus

datatype_name = 'corenlp_coref'
data_point_type

alias of CorefDocumentType

class CorefCorpusWriter(base_dir, gzip=False, append=False, trust_length=False, encoding='utf-8', **kwargs)[source]

Bases: pimlico.datatypes.tar.TarredCorpusWriter

document_to_raw_data(data)
class Entity(id, mentions)[source]

Bases: object

class Mention(id, sentence_num, start_index, end_index, text, type, position=None, animacy=None, is_representative_mention=None, number=None, gender=None)[source]

Bases: object

static from_json(json)[source]
to_json_dict()[source]