Skip to content
This repository has been archived by the owner on Nov 30, 2024. It is now read-only.

Commit

Permalink
Data Conversion + Gatsby
Browse files Browse the repository at this point in the history
This commit adds convert.py that reads the old datasets.yml and writes
it as a set of markdown files in src/datasets/ Some data massaging
needed to be done, especially to supply missing dates.

Also included in this commit is a barebone Gatsby application which is
replacing Jekyll so that it can work more like an web application
(sorting, filtering, etc).
  • Loading branch information
edsu committed Mar 29, 2020
1 parent 276b921 commit ba04be8
Show file tree
Hide file tree
Showing 135 changed files with 20,186 additions and 205 deletions.
8 changes: 5 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
_site
.sass-cache
.jekyll-metadata
node_modules
public
*.log
.cache
.vscode
5 changes: 0 additions & 5 deletions Gemfile

This file was deleted.

15 changes: 0 additions & 15 deletions _config.yml

This file was deleted.

2 changes: 0 additions & 2 deletions _includes/footer.html

This file was deleted.

10 changes: 0 additions & 10 deletions _includes/head.html

This file was deleted.

5 changes: 0 additions & 5 deletions _includes/header.html

This file was deleted.

16 changes: 0 additions & 16 deletions _layouts/default.html

This file was deleted.

45 changes: 45 additions & 0 deletions convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# A one-off script used to convert the old catalog YAML into separate Markdown files.

import re
import yaml
from datetime import date, datetime

from dateutil.parser import parse

datasets = yaml.load(open('static/data/datasets.yml'), Loader=yaml.Loader)
datasets.sort(key=lambda d: d['added'])

def json_serial(obj):
if isinstance(obj, (datetime, date)):
return obj.isoformat()
raise TypeError ("Type %s not serializable" % type(obj))

def unpack_date(s):
dates = s.split(' - ')
dates = list(map(lambda d: parse(d).date(), dates))
return {"start": dates[0], "end": dates[1]}

id = 0
for d in datasets:
id += 1
print(id, d['title'])

desc = d['description']
del d['description']

if type(d['creator']) != list:
d['creator'] = [d['creator']]

if type(d['dates']) == list:
d['dates'] = list(map(unpack_date, d['dates']))
else:
d['dates'] = [unpack_date(d['dates'])]

meta = yaml.dump(d, default_flow_style=False)

path = "src/datasets/{0:05n}.md".format(id)
fh = open(path, 'w')
fh.write('---\n')
fh.write(meta)
fh.write('---\n\n')
fh.write(desc)
56 changes: 0 additions & 56 deletions css/style.css

This file was deleted.

19 changes: 0 additions & 19 deletions datasets/README.md

This file was deleted.

28 changes: 0 additions & 28 deletions feed.xml

This file was deleted.

7 changes: 7 additions & 0 deletions gatsby-browser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/**
* Implement Gatsby's Browser APIs in this file.
*
* See: https://www.gatsbyjs.org/docs/browser-apis/
*/

// You can delete this file if you're not using it
26 changes: 26 additions & 0 deletions gatsby-config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
module.exports = {
siteMetadata: {
title: `DocNow Tweet Catalog`,
description: `A catalog of tweet identifier datasets`,
author: `Documenting the Now`,
},
plugins: [
`gatsby-plugin-react-helmet`,
`gatsby-transformer-json`,
`gatsby-transformer-remark`,
{
resolve: `gatsby-source-filesystem`,
options: {
name: `datasets`,
path: `${__dirname}/src/datasets`
}
},
{
resolve: `gatsby-source-filesystem`,
options: {
name: `images`,
path: `${__dirname}/src/images`,
},
}
]
}
49 changes: 49 additions & 0 deletions gatsby-node.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
const fs = require('fs')
const path = require('path')

exports.createPages = async ({ actions: { createPage }, graphql, pathPrefix }) => {
await makeDatasets(createPage, graphql, pathPrefix)
}

async function makeDatasets(createPage, graphql, pathPrefix) {
const results = await graphql(`
query {
allMarkdownRemark {
nodes {
frontmatter {
title
creator
added
published
dates {
start
end
}
tweets
tags
url
}
html
fileAbsolutePath
}
}
}
`)

const datasets = []
for (let dataset of results.data.allMarkdownRemark.nodes) {
const id = Number.parseInt(path.basename(dataset.fileAbsolutePath))
const context = {
id: id,
description: dataset.html,
...dataset.frontmatter
}
createPage({
path: `/datasets/${id}/`,
component: require.resolve(`./src/templates/dataset.js`),
context: context
})
datasets.push(context)
}
fs.writeFileSync(`static/data/datasets.json`, JSON.stringify(datasets, null, 2))
}
7 changes: 7 additions & 0 deletions gatsby-ssr.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/**
* Implement Gatsby's SSR (Server Side Rendering) APIs in this file.
*
* See: https://www.gatsbyjs.org/docs/ssr-apis/
*/

// You can delete this file if you're not using it
33 changes: 0 additions & 33 deletions index.html

This file was deleted.

Loading

0 comments on commit ba04be8

Please sign in to comment.