From b917b77e718d7b54b6e98b31e05fd039dceae140 Mon Sep 17 00:00:00 2001 From: Lightling Date: Wed, 28 Feb 2024 22:24:24 -0500 Subject: [PATCH] add batching --- run-downloadDb.js | 73 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 18 deletions(-) diff --git a/run-downloadDb.js b/run-downloadDb.js index e1afe8f..89cc6e9 100644 --- a/run-downloadDb.js +++ b/run-downloadDb.js @@ -19,7 +19,7 @@ const ctx = 'downloadDb.js'; */ const downloadDb = async () => { log(ctx, 'Grabbing db'); - let directory = '', threadMax = 1, db; + let directory = '', threadMax = 1, db, usersPerBatch, waitTime; try { directory = getArg('path'); } catch (err) { @@ -32,6 +32,15 @@ const downloadDb = async () => { } catch (err) { log(ctx, 'Using 1 thread'); } + try { + usersPerBatch = getArg('usersPerBatch'); + let waitTimeSec = getArg('waitTime'); + waitTime = waitTimeSec * 1000; + log(ctx, `Splitting into batches with ${usersPerBatch} per batch with ${waitTimeSec} seconds pausing between each batch.`); + } catch (err) { + log(ctx, `Not using batches${!!usersPerBatch ? ' (usersPerBatch provided without waitTime)' : ''}`); + usersPerBatch = null; + } const tryReadDb = async () => { let file = await readFile(`${directory}/db.json`, { encoding: 'utf8' }); db = JSON.parse(file); @@ -65,26 +74,54 @@ const downloadDb = async () => { }) }); - log(ctx, `Downloading media using //media for ${processes.length} users`); - await getMany(processes, threadMax, directory, 'media'); - const errorReadout = []; - processes.forEach(entry => { - entry.logs.forEach(log => { - if (log.includes('NotFoundError')) { - const strOut = `${entry.user} wasn't found: "${log.replace('\n', '')}". You may want to remove them from the db.json file or update their username.`; - errorReadout.push(strOut); - entry.lastError = strOut; - } else if (log.includes('AuthorizationError')) { - const strOut = `There was an authorization error for user ${entry.user}: "${log.replace('\n', '')}"`; - errorReadout.push(strOut); - entry.lastError = strOut; - } + const executeDl = async (arr) => { + log(ctx, `Downloading media using //media for ${arr.length} users`); + await getMany(arr, threadMax, directory, 'media'); + + arr.forEach(entry => { + entry.logs.forEach(log => { + if (log.includes('NotFoundError')) { + const strOut = `${entry.user} wasn't found: "${log.replace('\n', '')}". You may want to remove them from the db.json file or update their username.`; + errorReadout.push(strOut); + entry.lastError = strOut; + } else if (log.includes('AuthorizationError')) { + const strOut = `There was an authorization error for user ${entry.user}: "${log.replace('\n', '')}"`; + errorReadout.push(strOut); + entry.lastError = strOut; + } + }); }); - }); + + log(ctx, 'Downloading media using /search'); + await getMany(arr, threadMax, directory, 'search'); + } - log(ctx, 'Downloading media using /search'); - await getMany(processes, threadMax, directory, 'search'); + if (!!usersPerBatch) { + const batches = processes.reduce((arr, item, i) => { + const chunkIndex = Math.floor(i / usersPerBatch); + if (!arr[chunkIndex]) { + arr[chunkIndex] = []; + } + arr[chunkIndex].push(item); + return arr; + }, []); + + const delay = ms => new Promise(res => setTimeout(res, ms)); + log(ctx, JSON.stringify(batches, null, 2)); + + for (let i = 0; i < batches.length; i++) { + const batch = batches[i]; + log(ctx, `Executing batch ${batch[0]?.user}-${batch[batch.length - 1]?.user}`); + await executeDl(batch); + log(ctx, `Waiting ${waitTime / 1000} seconds before next batch.`); + await delay(waitTime); + } + + processes = batches.flat(1); + } else { + await executeDl(processes); + } log(ctx, 'Updating the db'); try {