1
0
Fork 0
gallery-dl-archive-manager/run-downloadDb.js
2025-01-21 20:30:04 -05:00

160 lines
4.5 KiB
JavaScript

import { readFile, writeFile } from 'fs/promises';
import { getArg, validateSiteArg, SITES } from './lib/args.js';
import { getMany } from './lib/dl.js';
import { error, log } from './lib/log.js';
import { createDb, userSchema } from './lib/schema.js';
const ctx = 'downloadDb.js';
/**
* Downloads all media possible for the users stored in db.json at the specified `--path`.
* Useful for first run or for augmenting existing media
* if it may be only partially archived in an uncertain state.
*
* If the db.json is missing, it will be automatically created,
* as this depends on users being defined in the db.json and not the folders present
* (as there could be some users whose accounts no longer exist
* or otherwise may not be maintained by the db.json anymore)
*/
const downloadDb = async () => {
log(ctx, 'Grabbing db');
let directory = '', threadMax = 1, db, usersPerBatch, waitTime, site;
try {
directory = getArg('path');
site = validateSiteArg(getArg('site'));
} catch (err) {
error(ctx, err);
return;
}
try {
threadMax = getArg('threads');
log(ctx, `Using ${threadMax} threads`);
} catch (err) {
log(ctx, 'Using 1 thread');
}
try {
usersPerBatch = getArg('usersPerBatch');
let waitTimeSec = getArg('waitTime');
waitTime = waitTimeSec * 1000;
log(ctx, `Splitting into batches with ${usersPerBatch} per batch with ${waitTimeSec} seconds pausing between each batch.`);
} catch (err) {
log(ctx, `Not using batches${!!usersPerBatch ? ' (usersPerBatch provided without waitTime)' : ''}`);
usersPerBatch = null;
}
const tryReadDb = async () => {
let file = await readFile(`${directory}/db.json`, { encoding: 'utf8' });
db = JSON.parse(file);
}
try {
await tryReadDb();
} catch (err) {
if (err.toString().includes('ENOENT')) {
try {
log(ctx, 'Database was not yet present. Creating it now.');
await createDb(directory);
await tryReadDb();
} catch (err2) {
error(ctx, err2);
return;
}
} else {
error(ctx, err);
return;
}
}
let processes = [];
Object.keys(db.userList).forEach(user => {
processes.push({
...db.userList[user],
user,
lastUpdated: Date.now(),
lastError: undefined,
logs: [],
})
});
const errorReadout = [];
const executeDl = async (arr) => {
log(ctx, `Downloading media using /<user>/media for ${arr.length} users`);
await getMany({
userDb: arr,
threadMax,
directory,
mode: 'media',
site,
});
arr.forEach(entry => {
entry.logs.forEach(log => {
if (log.includes('NotFoundError')) {
const strOut = `${entry.user} wasn't found: "${log.replace('\n', '')}". You may want to remove them from the db.json file or update their username.`;
errorReadout.push(strOut);
entry.lastError = strOut;
} else if (log.includes('AuthorizationError')) {
const strOut = `There was an authorization error for user ${entry.user}: "${log.replace('\n', '')}"`;
errorReadout.push(strOut);
entry.lastError = strOut;
}
});
});
if (site === SITES.TWITTER) {
log(ctx, 'Downloading media using /search');
await getMany({
userDb: arr,
threadMax,
directory,
mode: 'search',
site,
});
}
}
if (!!usersPerBatch) {
const batches = processes.reduce((arr, item, i) => {
const chunkIndex = Math.floor(i / usersPerBatch);
if (!arr[chunkIndex]) {
arr[chunkIndex] = [];
}
arr[chunkIndex].push(item);
return arr;
}, []);
const delay = ms => new Promise(res => setTimeout(res, ms));
for (let i = 0; i < batches.length; i++) {
const batch = batches[i];
log(ctx, `Executing batch ${batch[0]?.user}-${batch[batch.length - 1]?.user}`);
await executeDl(batch);
log(ctx, `Waiting ${waitTime / 1000} seconds before next batch.`);
await delay(waitTime);
}
processes = batches.flat(1);
} else {
await executeDl(processes);
}
log(ctx, 'Updating the db');
try {
let updated = {
...db,
userList: {
...db.userList,
...Object.fromEntries(processes.map(e => [e.user, userSchema(e)])),
},
}
await writeFile(`${directory}/db.json`, JSON.stringify(updated, null, 2));
} catch (err) {
error(ctx, err);
return;
}
log(ctx, 'Collecting errors');
errorReadout.forEach(err => error(ctx, err));
log(ctx, 'Done');
}
downloadDb();