Automated script to update metadata
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
const fs = require('fs');
|
||||
const { promisify } = require('util');
|
||||
const fs = require('fs-extra');
|
||||
const fetch = require('node-fetch');
|
||||
const dayjs = require('dayjs');
|
||||
|
||||
@@ -10,7 +9,8 @@ process.on('unhandledRejection', error => {
|
||||
});
|
||||
|
||||
if (!process.env.TOKEN) {
|
||||
throw new Error('no github token found');
|
||||
console.log('❌ no credentials found.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// --- ENV VAR ---
|
||||
@@ -18,11 +18,12 @@ const BATCH_SIZE = parseInt(process.env.BATCH_SIZE, 10) || 10;
|
||||
const DELAY = parseInt(process.env.DELAY, 10) || 3000;
|
||||
// --- FILENAME ---
|
||||
const README = 'README.md';
|
||||
const GITHUB_METADATA_FILE = `data/${dayjs().format(
|
||||
const DATA_FOLDER = 'data';
|
||||
const GITHUB_METADATA_FILE = `${DATA_FOLDER}/${dayjs().format(
|
||||
'YYYY-MM-DDTHH.mm.ss',
|
||||
)}-fetched_repo_data.json`;
|
||||
const LATEST_FILENAME = 'data/latest';
|
||||
const GITHUB_REPOS = 'data/list_repos.json';
|
||||
const LATEST_FILENAME = `${DATA_FOLDER}/latest`;
|
||||
const GITHUB_REPOS = `${DATA_FOLDER}/list_repos.json`;
|
||||
// --- HTTP ---
|
||||
const API = 'https://api.github.com/';
|
||||
const options = {
|
||||
@@ -35,29 +36,34 @@ const options = {
|
||||
};
|
||||
|
||||
const removeHost = x => x.slice('https://github.com/'.length, x.length);
|
||||
const readFile = promisify(fs.readFile);
|
||||
const writeFile = promisify(fs.writeFile);
|
||||
const printError = err => err && console.error('❌ ERROR', err);
|
||||
const barLine = console.draft('Starting batch...');
|
||||
|
||||
const handleFailure = err => {
|
||||
console.error('❌ ERROR', err);
|
||||
process.exit(1);
|
||||
};
|
||||
const LOG = (...logs) => {
|
||||
if (process.env.DEBUG) console.log(...logs);
|
||||
};
|
||||
const delay = ms =>
|
||||
new Promise(resolve => {
|
||||
setTimeout(() => resolve(), ms);
|
||||
});
|
||||
|
||||
const get = (path, opt) =>
|
||||
fetch(`${API}repos/${path}`, {
|
||||
const get = (pathURL, opt) => {
|
||||
LOG(` Fetching ${pathURL}`);
|
||||
return fetch(`${API}repos/${pathURL}`, {
|
||||
...options,
|
||||
...opt,
|
||||
})
|
||||
.catch(printError)
|
||||
.catch(handleFailure)
|
||||
.then(response => {
|
||||
if (response.ok) return response.json();
|
||||
throw new Error('Network response was not ok.');
|
||||
})
|
||||
.catch(printError);
|
||||
.catch(handleFailure);
|
||||
};
|
||||
|
||||
const fetchAll = batch => Promise.all(batch.map(async path => get(path)));
|
||||
const fetchAll = batch => Promise.all(batch.map(async pathURL => get(pathURL)));
|
||||
|
||||
const extractAllRepos = markdown => {
|
||||
const re = /https:\/\/github\.com\/([a-zA-Z0-9-._]+)\/([a-zA-Z0-9-._]+)/g;
|
||||
@@ -80,8 +86,9 @@ async function batchFetchRepoMetadata(githubRepos) {
|
||||
/* eslint-disable no-await-in-loop */
|
||||
for (let i = 0; i < repos.length; i += BATCH_SIZE) {
|
||||
const batch = repos.slice(i, i + BATCH_SIZE);
|
||||
if (process.env.DEBUG) console.log({ batch });
|
||||
LOG({ batch });
|
||||
const res = await fetchAll(batch);
|
||||
LOG('batch fetched...');
|
||||
metadata.push(...res);
|
||||
ProgressBar(i, BATCH_SIZE, repos.length);
|
||||
// poor man's rate limiting so github don't ban us
|
||||
@@ -91,29 +98,59 @@ async function batchFetchRepoMetadata(githubRepos) {
|
||||
return metadata;
|
||||
}
|
||||
|
||||
function shouldUpdate(fileLatestUpdate) {
|
||||
LOG({ fileLatestUpdate });
|
||||
if (!fileLatestUpdate) return true;
|
||||
|
||||
const hours = fileLatestUpdate.slice(
|
||||
'data/YYYY-MM-DDT'.length,
|
||||
'data/YYYY-MM-DDTHH'.length,
|
||||
);
|
||||
const latestUpdate = dayjs(
|
||||
fileLatestUpdate.slice('data/'.length, 'data/YYYY-MM-DD'.length),
|
||||
).add(hours, 'hour');
|
||||
LOG({ latestUpdate: latestUpdate.format() });
|
||||
const isMoreThanOneDay = dayjs().diff(latestUpdate, 'hours') >= 1;
|
||||
return isMoreThanOneDay;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
const markdown = await readFile(README, { encoding: 'utf8' });
|
||||
const getLatest = await fs.readFile(LATEST_FILENAME, 'utf8');
|
||||
LOG('Checking if updating is needed');
|
||||
if (!shouldUpdate(getLatest)) {
|
||||
console.log('Last update was less than a day ago 😅. Exiting...');
|
||||
process.exit(1);
|
||||
}
|
||||
const markdown = await fs.readFile(README, 'utf8');
|
||||
const githubRepos = extractAllRepos(markdown);
|
||||
await writeFile(
|
||||
GITHUB_REPOS,
|
||||
JSON.stringify(githubRepos, null, 2),
|
||||
printError,
|
||||
);
|
||||
LOG('writing repo list to disk...');
|
||||
await fs.outputJSON(GITHUB_REPOS, githubRepos, { spaces: 2 });
|
||||
|
||||
LOG('fetching data...');
|
||||
const metadata = await batchFetchRepoMetadata(githubRepos);
|
||||
|
||||
await writeFile(
|
||||
GITHUB_METADATA_FILE,
|
||||
JSON.stringify(metadata, null, 2),
|
||||
printError,
|
||||
);
|
||||
console.log('✅ metadata saved');
|
||||
LOG('writing metadata to disk...');
|
||||
await fs.outputJSON(GITHUB_METADATA_FILE, metadata, { spaces: 2 });
|
||||
LOG('✅ metadata saved');
|
||||
|
||||
// save the latest
|
||||
fs.writeFile(LATEST_FILENAME, GITHUB_METADATA_FILE, printError);
|
||||
LOG('removing latest...');
|
||||
await fs.remove(LATEST_FILENAME);
|
||||
|
||||
LOG('writing latest...');
|
||||
await fs.outputFile(LATEST_FILENAME, GITHUB_METADATA_FILE);
|
||||
|
||||
if (process.env.DEBUG) {
|
||||
console.log('✅ late update time saved', {
|
||||
LATEST_FILENAME,
|
||||
GITHUB_METADATA_FILE,
|
||||
});
|
||||
}
|
||||
|
||||
console.log('gracefully shutting down.');
|
||||
process.exit();
|
||||
} catch (err) {
|
||||
printError(err);
|
||||
handleFailure(err);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user