pimlico.datatypes.parse.candc module

class pimlico.datatypes.parse.candc.CandcOutputCorpus(base_dir, pipeline, raw_data=False)[source]

Bases: pimlico.datatypes.tar.TarredCorpus

process_document(data)[source]
datatype_name = 'candc_output'
class pimlico.datatypes.parse.candc.CandcOutputCorpusWriter(base_dir, gzip=False, append=False, trust_length=False, encoding='utf-8')[source]

Bases: pimlico.datatypes.tar.TarredCorpusWriter

document_to_raw_data(data)