Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/json export #778

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/api/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1492,7 +1492,7 @@ def test_can_download_speech2text_jsonl(self):

def test_can_download_labelling_json1(self):
self.download_test_helper(url=self.labeling_url,
format='json1',
format='jsonl',
expected_status=status.HTTP_200_OK)

def test_can_download_plain_text(self):
Expand Down
32 changes: 26 additions & 6 deletions app/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,23 +454,40 @@ def render(self, data, accepted_media_type=None, renderer_context=None):

class JSONPainter(object):

def paint(self, documents):
def paint(self, documents, export_format='jsonl'):
""" Paint the text and annotations for dataset export.

arguments:
export_format - distinguishes between jsonl and json formats, default is jsonl.
"""
serializer = DocumentSerializer(documents, many=True)
data = []
if export_format == 'json':
data.append([])
for d in serializer.data:
d['meta'] = json.loads(d['meta'])
for a in d['annotations']:
a.pop('id')
a.pop('prob')
a.pop('document')
data.append(d)
if export_format == 'json':
data[0].append(d)
else:
data.append(d)
return data

@staticmethod
def paint_labels(documents, labels):
def paint_labels(documents, labels, export_format='jsonl'):
""" Paint the labels for dataset export.

arguments:
export_format - distinguishes between jsonl and json formats, default is jsonl.
"""
serializer_labels = LabelSerializer(labels, many=True)
serializer = DocumentSerializer(documents, many=True)
data = []
if export_format == 'json':
data.append([])
for d in serializer.data:
labels = []
for a in d['annotations']:
Expand All @@ -482,14 +499,17 @@ def paint_labels(documents, labels):
d.pop('annotations')
d['labels'] = labels
d['meta'] = json.loads(d['meta'])
data.append(d)
if export_format == 'json':
data[0].append(d)
else:
data.append(d)
return data


class CSVPainter(JSONPainter):

def paint(self, documents):
data = super().paint(documents)
def paint(self, documents, export_format='csv'):
data = super().paint(documents, export_format='csv')
res = []
for d in data:
annotations = d.pop('annotations')
Expand Down
19 changes: 12 additions & 7 deletions app/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,20 +354,25 @@ def get(self, request, *args, **kwargs):
project = get_object_or_404(Project, pk=self.kwargs['project_id'])
documents = project.documents.all()
painter = self.select_painter(format)
# json1 format prints text labels while json format prints annotations with label ids
# json1 format - "labels": [[0, 15, "PERSON"], ..]
# json format - "annotations": [{"label": 5, "start_offset": 0, "end_offset": 2, "user": 1},..]
if format == "json1":
# jsonltext and jsontext format - "labels": [[0, 15, "PERSON"], ..]
# json and jsonl format - "annotations": [{"label": 5, "start_offset": 0, "end_offset": 2, "user": 1},..]
if format == 'jsonltext':
labels = project.labels.all()
data = JSONPainter.paint_labels(documents, labels)
data = JSONPainter.paint_labels(documents, labels, export_format='jsonl')
elif format == 'jsontext':
labels = project.labels.all()
data = JSONPainter.paint_labels(documents, labels, export_format='json')
elif format == 'jsonl':
data = painter.paint(documents, export_format='jsonl')
else:
data = painter.paint(documents)
data = painter.paint(documents, export_format='json')

return Response(data)

def select_painter(self, format):
if format == 'csv':
return CSVPainter()
elif format == 'json' or format == "json1":
elif format in ['json','jsonl','jsontext','jsonltext']:
return JSONPainter()
else:
raise ValidationError('format {} is invalid.'.format(format))
Expand Down
54 changes: 47 additions & 7 deletions frontend/store/projects.js
Original file line number Diff line number Diff line change
Expand Up @@ -134,14 +134,29 @@ export const getters = {
}
const json = {
type: 'json',
text: 'JSONL'
text: 'JSON'
}
const jsonl = {
type: 'json1',
text: 'JSONL(Text label)'
type: 'jsonl',
text: 'JSONL'
}
const jsontext = {
type: 'jsontext',
text: 'JSON (Text label)'
}
const jsonltext = {
type: 'jsonltext',
text: 'JSONL (Text label)'
}
if (state.current.project_type === 'DocumentClassification') {
json.examples = [
'[\n',
'{"id": 1, "text": "Terrible customer service.", "annotations": [{"id": 1, "label": 1, "user": 1}]}\n',
'{"id": 2, "text": "Really great transaction.", "annotations": [{"id": 2, "label": 2, "user": 1}]}\n',
'{"id": 3, "text": "Great price.", "annotations": [{"id": 3, "label": 2, "user": 1}]}\n',
']'
]
jsonl.examples = [
'{"id": 1, "text": "Terrible customer service.", "annotations": [{"id": 1, "label": 1, "user": 1}]}\n',
'{"id": 2, "text": "Really great transaction.", "annotations": [{"id": 2, "label": 2, "user": 1}]}\n',
'{"id": 3, "text": "Great price.", "annotations": [{"id": 3, "label": 2, "user": 1}]}'
Expand All @@ -154,29 +169,53 @@ export const getters = {
]
return [
csv,
jsonl,
json
]
} else if (state.current.project_type === 'SequenceLabeling') {
json.examples = [
'[\n',
'{"id": 1, "text": "EU rejects ...", "annotations": [{"id": 1, "label": 2, "start_offset": 0, "end_offset": 2, "user": 1}]}\n',
'{"id": 2, "text": "Peter Blackburn", "annotations": [{"id": 2, "label": 1, "start_offset": 0, "end_offset": 15, "user": 1}]}\n',
'{"id": 3, "text": "President Obama", "annotations": [{"id": 3, "label": 1, "start_offset": 10, "end_offset": 15, "user": 1}]}'
'{"id": 3, "text": "President Obama", "annotations": [{"id": 3, "label": 1, "start_offset": 10, "end_offset": 15, "user": 1}]}\n',
']'
]
jsonl.examples = [
'{"id": 1, "text": "EU rejects ...", "annotations": [{"id": 1, "label": 2, "start_offset": 0, "end_offset": 2, "user": 1}]}\n',
'{"id": 2, "text": "Peter Blackburn", "annotations": [{"id": 2, "label": 1, "start_offset": 0, "end_offset": 15, "user": 1}]}\n',
'{"id": 3, "text": "President Obama", "annotations": [{"id": 3, "label": 1, "start_offset": 10, "end_offset": 15, "user": 1}]}'
]
jsontext.examples = [
'[\n',
'{"id": 1, "text": "EU rejects ...", "labels": [[0,2,"ORG"], [11,17, "MISC"], [34,41,"ORG"]]}\n',
'{"id": 2, "text": "Peter Blackburn", "labels": [[0, 15, "PERSON"]]}\n',
'{"id": 3, "text": "President Obama", "labels": [[10, 15, "PERSON"]]}\n',
']'
]
jsonltext.examples = [
'{"id": 1, "text": "EU rejects ...", "labels": [[0,2,"ORG"], [11,17, "MISC"], [34,41,"ORG"]]}\n',
'{"id": 2, "text": "Peter Blackburn", "labels": [[0, 15, "PERSON"]]}\n',
'{"id": 3, "text": "President Obama", "labels": [[10, 15, "PERSON"]]}\n'
]
return [
json,
jsonl
jsonl,
jsontext,
jsonltext
]
} else if (state.current.project_type === 'Seq2seq') {
json.examples = [
jsonl.examples = [
'{"id": 1, "text": "Hello!", "annotations": [{"id": 1, "label": "こんにちは!", "user": 1}]}\n',
'{"id": 2, "text": "Good morning.", "annotations": [{"id": 2, "label": "おはようございます。", "user": 1}]}\n',
'{"id": 3, "text": "See you.", "annotations": [{"id": 3, "label": "さようなら。", "user": 1}]}'
]
json.examples = [
'[\n',
'{"id": 1, "text": "Hello!", "annotations": [{"id": 1, "label": "こんにちは!", "user": 1}]}\n',
'{"id": 2, "text": "Good morning.", "annotations": [{"id": 2, "label": "おはようございます。", "user": 1}]}\n',
'{"id": 3, "text": "See you.", "annotations": [{"id": 3, "label": "さようなら。", "user": 1}]}\n',
']'
]
csv.examples = [
'id,text,label,user\n',
'1,"Hello!","こんにちは!",1\n',
Expand All @@ -185,7 +224,8 @@ export const getters = {
]
return [
csv,
json
json,
jsonl
]
} else {
return []
Expand Down