160 lines
4.5 KiB
JavaScript
160 lines
4.5 KiB
JavaScript
import { readFile, writeFile } from 'fs/promises';
|
|
|
|
import { getArg, validateSiteArg, SITES } from './lib/args.js';
|
|
import { getMany } from './lib/dl.js';
|
|
import { error, log } from './lib/log.js';
|
|
import { createDb, userSchema } from './lib/schema.js';
|
|
|
|
const ctx = 'downloadDb.js';
|
|
|
|
/**
|
|
* Downloads all media possible for the users stored in db.json at the specified `--path`.
|
|
* Useful for first run or for augmenting existing media
|
|
* if it may be only partially archived in an uncertain state.
|
|
*
|
|
* If the db.json is missing, it will be automatically created,
|
|
* as this depends on users being defined in the db.json and not the folders present
|
|
* (as there could be some users whose accounts no longer exist
|
|
* or otherwise may not be maintained by the db.json anymore)
|
|
*/
|
|
const downloadDb = async () => {
|
|
log(ctx, 'Grabbing db');
|
|
let directory = '', threadMax = 1, db, usersPerBatch, waitTime, site;
|
|
try {
|
|
directory = getArg('path');
|
|
site = validateSiteArg(getArg('site'));
|
|
} catch (err) {
|
|
error(ctx, err);
|
|
return;
|
|
}
|
|
try {
|
|
threadMax = getArg('threads');
|
|
log(ctx, `Using ${threadMax} threads`);
|
|
} catch (err) {
|
|
log(ctx, 'Using 1 thread');
|
|
}
|
|
try {
|
|
usersPerBatch = getArg('usersPerBatch');
|
|
let waitTimeSec = getArg('waitTime');
|
|
waitTime = waitTimeSec * 1000;
|
|
log(ctx, `Splitting into batches with ${usersPerBatch} per batch with ${waitTimeSec} seconds pausing between each batch.`);
|
|
} catch (err) {
|
|
log(ctx, `Not using batches${!!usersPerBatch ? ' (usersPerBatch provided without waitTime)' : ''}`);
|
|
usersPerBatch = null;
|
|
}
|
|
const tryReadDb = async () => {
|
|
let file = await readFile(`${directory}/db.json`, { encoding: 'utf8' });
|
|
db = JSON.parse(file);
|
|
}
|
|
try {
|
|
await tryReadDb();
|
|
} catch (err) {
|
|
if (err.toString().includes('ENOENT')) {
|
|
try {
|
|
log(ctx, 'Database was not yet present. Creating it now.');
|
|
await createDb(directory);
|
|
await tryReadDb();
|
|
} catch (err2) {
|
|
error(ctx, err2);
|
|
return;
|
|
}
|
|
} else {
|
|
error(ctx, err);
|
|
return;
|
|
}
|
|
}
|
|
|
|
let processes = [];
|
|
Object.keys(db.userList).forEach(user => {
|
|
processes.push({
|
|
...db.userList[user],
|
|
user,
|
|
lastUpdated: Date.now(),
|
|
lastError: undefined,
|
|
logs: [],
|
|
})
|
|
});
|
|
|
|
const errorReadout = [];
|
|
const executeDl = async (arr) => {
|
|
log(ctx, `Downloading media using /<user>/media for ${arr.length} users`);
|
|
await getMany({
|
|
userDb: arr,
|
|
threadMax,
|
|
directory,
|
|
mode: 'media',
|
|
site,
|
|
});
|
|
|
|
arr.forEach(entry => {
|
|
entry.logs.forEach(log => {
|
|
if (log.includes('NotFoundError')) {
|
|
const strOut = `${entry.user} wasn't found: "${log.replace('\n', '')}". You may want to remove them from the db.json file or update their username.`;
|
|
errorReadout.push(strOut);
|
|
entry.lastError = strOut;
|
|
} else if (log.includes('AuthorizationError')) {
|
|
const strOut = `There was an authorization error for user ${entry.user}: "${log.replace('\n', '')}"`;
|
|
errorReadout.push(strOut);
|
|
entry.lastError = strOut;
|
|
}
|
|
});
|
|
});
|
|
|
|
if (site === SITES.TWITTER) {
|
|
log(ctx, 'Downloading media using /search');
|
|
await getMany({
|
|
userDb: arr,
|
|
threadMax,
|
|
directory,
|
|
mode: 'search',
|
|
site,
|
|
});
|
|
}
|
|
}
|
|
|
|
if (!!usersPerBatch) {
|
|
const batches = processes.reduce((arr, item, i) => {
|
|
const chunkIndex = Math.floor(i / usersPerBatch);
|
|
if (!arr[chunkIndex]) {
|
|
arr[chunkIndex] = [];
|
|
}
|
|
arr[chunkIndex].push(item);
|
|
return arr;
|
|
}, []);
|
|
|
|
const delay = ms => new Promise(res => setTimeout(res, ms));
|
|
|
|
for (let i = 0; i < batches.length; i++) {
|
|
const batch = batches[i];
|
|
log(ctx, `Executing batch ${batch[0]?.user}-${batch[batch.length - 1]?.user}`);
|
|
await executeDl(batch);
|
|
log(ctx, `Waiting ${waitTime / 1000} seconds before next batch.`);
|
|
await delay(waitTime);
|
|
}
|
|
|
|
processes = batches.flat(1);
|
|
} else {
|
|
await executeDl(processes);
|
|
}
|
|
|
|
log(ctx, 'Updating the db');
|
|
try {
|
|
let updated = {
|
|
...db,
|
|
userList: {
|
|
...db.userList,
|
|
...Object.fromEntries(processes.map(e => [e.user, userSchema(e)])),
|
|
},
|
|
}
|
|
await writeFile(`${directory}/db.json`, JSON.stringify(updated, null, 2));
|
|
} catch (err) {
|
|
error(ctx, err);
|
|
return;
|
|
}
|
|
|
|
log(ctx, 'Collecting errors');
|
|
errorReadout.forEach(err => error(ctx, err));
|
|
log(ctx, 'Done');
|
|
}
|
|
|
|
downloadDb();
|