docs
This commit is contained in:
parent
121452e1c9
commit
cae6761d24
7 changed files with 132 additions and 78 deletions
12
lib/args.js
12
lib/args.js
|
@ -1,5 +1,11 @@
|
||||||
let argRegex = new RegExp('(\-\-[a-zA-Z0-9]+)(=)(.*)');
|
let argRegex = new RegExp('(\-\-[a-zA-Z0-9]+)(=)(.*)');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses args from node process
|
||||||
|
* @returns { {[ key: string ]: string | string[]} } parsed args array;
|
||||||
|
* if the arg followed format `--arg=val`, it will be indexed as `args[arg]=val`;
|
||||||
|
* otherwise it will be pushed to an array under `args['_']`
|
||||||
|
*/
|
||||||
const parseArgs = () => {
|
const parseArgs = () => {
|
||||||
let _args = { '_': [] };
|
let _args = { '_': [] };
|
||||||
[...process.argv.slice(2)].forEach(arg => {
|
[...process.argv.slice(2)].forEach(arg => {
|
||||||
|
@ -16,6 +22,12 @@ const parseArgs = () => {
|
||||||
|
|
||||||
export const args = parseArgs();
|
export const args = parseArgs();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets value of specified arg
|
||||||
|
* @param {string} argName the name of the arg to retrieve
|
||||||
|
* @returns {string} value of specified arg
|
||||||
|
* @throws will throw error if arg was not specified
|
||||||
|
*/
|
||||||
export const getArg = (argName) => {
|
export const getArg = (argName) => {
|
||||||
let arg = args[argName];
|
let arg = args[argName];
|
||||||
if (!!arg) {
|
if (!!arg) {
|
||||||
|
|
46
lib/dl.js
46
lib/dl.js
|
@ -5,6 +5,50 @@ import { error, log } from './log.js';
|
||||||
|
|
||||||
const ctx = 'getUser.js';
|
const ctx = 'getUser.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs {@link getUser} concurrently for many users
|
||||||
|
* @param {{ user: string, logs: string[] }[]} userDb userDb to run {@link getUser} on
|
||||||
|
* @param {number} threadMax max number of threads to run concurrently
|
||||||
|
* @param {string} directory the directory to save the user media folders in
|
||||||
|
* @returns {Promise<void>} promise which resolves once all threads for every user have finished
|
||||||
|
*/
|
||||||
|
export const getMany = (userDb, threadMax, directory) => new Promise((resolve, reject) => {
|
||||||
|
let running = 0;
|
||||||
|
let index = 0;
|
||||||
|
|
||||||
|
const get = () => {
|
||||||
|
const onFinish = (currentIndex) => {
|
||||||
|
log(ctx, `Finished ${userDb[currentIndex].user}/media`);
|
||||||
|
--running;
|
||||||
|
get();
|
||||||
|
}
|
||||||
|
|
||||||
|
while (running < threadMax && index < userDb.length) {
|
||||||
|
++running;
|
||||||
|
let currentIndex = index++;
|
||||||
|
|
||||||
|
let proc = getUser(userDb[currentIndex].user, directory);
|
||||||
|
proc.stdout.on('data', data => {
|
||||||
|
userDb[currentIndex].logs.push(data);
|
||||||
|
});
|
||||||
|
proc.stderr.on('data', _ => onFinish(currentIndex));
|
||||||
|
proc.on('close', _ => onFinish(currentIndex));
|
||||||
|
proc.on('error', _ => onFinish(currentIndex));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (running === 0) {
|
||||||
|
resolve();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
get();
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves gallery for specified user and saves to the specified parent path
|
||||||
|
* @param {string} user the user to retrieve media from
|
||||||
|
* @param {string} path the path to save the user's media folder in
|
||||||
|
* @returns {ChildProcess} the process that was {@link spawn spawned}
|
||||||
|
*/
|
||||||
export const getUser = (user, path) => {
|
export const getUser = (user, path) => {
|
||||||
const url = `https://twitter.com/${user}/media`;
|
const url = `https://twitter.com/${user}/media`;
|
||||||
let args;
|
let args;
|
||||||
|
@ -18,7 +62,7 @@ export const getUser = (user, path) => {
|
||||||
const proc = spawn(`python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} ${url}`, { shell: true });
|
const proc = spawn(`python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} ${url}`, { shell: true });
|
||||||
|
|
||||||
proc.stdout.on('data', data => {
|
proc.stdout.on('data', data => {
|
||||||
//log(ctx, data);
|
log(ctx, data);
|
||||||
});
|
});
|
||||||
proc.stderr.on('data', data => {
|
proc.stderr.on('data', data => {
|
||||||
error(ctx, data);
|
error(ctx, data);
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
import { readdir } from 'fs/promises'
|
import { readdir } from 'fs/promises'
|
||||||
|
|
||||||
export const getChildren = async source =>
|
/**
|
||||||
|
* Gets the directories under the specified source directory
|
||||||
|
* @param {string} source path to the parent directory
|
||||||
|
* @returns {Promise<string[]>} the names of the child directories
|
||||||
|
*/
|
||||||
|
export const getChildDirectories = async source =>
|
||||||
(await readdir(source, { withFileTypes: true }))
|
(await readdir(source, { withFileTypes: true }))
|
||||||
.filter(dirent => dirent.isDirectory())
|
.filter(dirent => dirent.isDirectory())
|
||||||
.map(dirent => dirent.name)
|
.map(dirent => dirent.name)
|
||||||
|
|
14
lib/log.js
14
lib/log.js
|
@ -2,15 +2,29 @@ const options = { year: 'numeric', day: '2-digit', month: '2-digit', hour: '2-di
|
||||||
const osLocale = (process.env.LANG || process.env.LANGUAGE || process.env.LC_ALL || process.env.LC_MESSAGES).split('.')[0].replace('_', '-');
|
const osLocale = (process.env.LANG || process.env.LANGUAGE || process.env.LC_ALL || process.env.LC_MESSAGES).split('.')[0].replace('_', '-');
|
||||||
const dtFormat = Intl.DateTimeFormat(osLocale, options);
|
const dtFormat = Intl.DateTimeFormat(osLocale, options);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets formatted timestamp
|
||||||
|
* @returns {string} timestamp string
|
||||||
|
*/
|
||||||
export const getTime = () => {
|
export const getTime = () => {
|
||||||
return dtFormat.format(new Date());
|
return dtFormat.format(new Date());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Logs formatted output to the console
|
||||||
|
* @param {string} src the source script doing the logging
|
||||||
|
* @param {string} msg the message to log
|
||||||
|
*/
|
||||||
export const log = (src, msg) => {
|
export const log = (src, msg) => {
|
||||||
const time = getTime();
|
const time = getTime();
|
||||||
console.log(`${time} : ${src} : ${msg}`);
|
console.log(`${time} : ${src} : ${msg}`);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Logs formatted output to the console as an error
|
||||||
|
* @param {string} src the source script doing the logging
|
||||||
|
* @param {string} msg the message to log
|
||||||
|
*/
|
||||||
export const error = (src, msg) => {
|
export const error = (src, msg) => {
|
||||||
const time = new Date().toISOString();
|
const time = new Date().toISOString();
|
||||||
console.error(`${time} : ${src} : ${msg}`);
|
console.error(`${time} : ${src} : ${msg}`);
|
||||||
|
|
|
@ -1,74 +0,0 @@
|
||||||
import { readFile } from 'fs/promises';
|
|
||||||
|
|
||||||
import { getArg } from './lib/args.js';
|
|
||||||
import { getUser } from './lib/dl.js';
|
|
||||||
import { error, log } from './lib/log.js';
|
|
||||||
|
|
||||||
const ctx = 'buildDb.js';
|
|
||||||
|
|
||||||
const getMany = (processes, threadMax, directory) => new Promise((resolve, reject) => {
|
|
||||||
let running = 0;
|
|
||||||
let index = 0;
|
|
||||||
|
|
||||||
const get = () => {
|
|
||||||
const onFinish = (currentIndex) => {
|
|
||||||
log(ctx, `Finished ${processes[currentIndex].user}/media`);
|
|
||||||
--running;
|
|
||||||
get();
|
|
||||||
}
|
|
||||||
|
|
||||||
while (running < threadMax && index < processes.length) {
|
|
||||||
++running;
|
|
||||||
let currentIndex = index++;
|
|
||||||
|
|
||||||
let proc = getUser(processes[currentIndex].user, directory);
|
|
||||||
proc.stdout.on('data', data => {
|
|
||||||
processes[currentIndex].logs.push(data);
|
|
||||||
});
|
|
||||||
proc.stderr.on('data', _ => onFinish(currentIndex));
|
|
||||||
proc.on('close', _ => onFinish(currentIndex));
|
|
||||||
proc.on('error', _ => onFinish(currentIndex));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (running === 0) {
|
|
||||||
resolve();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
get();
|
|
||||||
});
|
|
||||||
|
|
||||||
const buildDb = async () => {
|
|
||||||
log(ctx, 'Grabbing db');
|
|
||||||
let directory = '', threadMax = 1, db;
|
|
||||||
try {
|
|
||||||
directory = getArg('path');
|
|
||||||
} catch (err) {
|
|
||||||
error(ctx, err);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
threadMax = getArg('threads');
|
|
||||||
log(ctx, `Using ${threadMax} threads`);
|
|
||||||
} catch (err) {
|
|
||||||
log(ctx, 'Using 1 thread');
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
let file = await readFile(`${directory}/db.json`, { encoding: 'utf8' });
|
|
||||||
db = JSON.parse(file);
|
|
||||||
} catch (err) {
|
|
||||||
error(ctx, err);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let processes = db.map(entry => ({
|
|
||||||
...entry,
|
|
||||||
logs: [],
|
|
||||||
}));
|
|
||||||
|
|
||||||
log(ctx, `Building db using <user>/media for ${processes.length} users`);
|
|
||||||
await getMany(processes, threadMax, directory);
|
|
||||||
|
|
||||||
log(ctx, 'Building db using /search');
|
|
||||||
}
|
|
||||||
|
|
||||||
buildDb();
|
|
48
run-downloadDb.js
Normal file
48
run-downloadDb.js
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
import { readFile } from 'fs/promises';
|
||||||
|
|
||||||
|
import { getArg } from './lib/args.js';
|
||||||
|
import { getMany } from './lib/dl.js';
|
||||||
|
import { error, log } from './lib/log.js';
|
||||||
|
|
||||||
|
const ctx = 'downloadDb.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Downloads all media possible for the users stored in db.json at the specified `--path`.
|
||||||
|
* Useful for first run or for augmenting existing media
|
||||||
|
* if it may be only partially archived in an uncertain state.
|
||||||
|
*/
|
||||||
|
const downloadDb = async () => {
|
||||||
|
log(ctx, 'Grabbing db');
|
||||||
|
let directory = '', threadMax = 1, db;
|
||||||
|
try {
|
||||||
|
directory = getArg('path');
|
||||||
|
} catch (err) {
|
||||||
|
error(ctx, err);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
threadMax = getArg('threads');
|
||||||
|
log(ctx, `Using ${threadMax} threads`);
|
||||||
|
} catch (err) {
|
||||||
|
log(ctx, 'Using 1 thread');
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
let file = await readFile(`${directory}/db.json`, { encoding: 'utf8' });
|
||||||
|
db = JSON.parse(file);
|
||||||
|
} catch (err) {
|
||||||
|
error(ctx, err);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let processes = db.map(entry => ({
|
||||||
|
...entry,
|
||||||
|
logs: [],
|
||||||
|
}));
|
||||||
|
|
||||||
|
log(ctx, `Building db using <user>/media for ${processes.length} users`);
|
||||||
|
await getMany(processes, threadMax, directory);
|
||||||
|
|
||||||
|
log(ctx, 'Building db using /search');
|
||||||
|
}
|
||||||
|
|
||||||
|
downloadDb();
|
|
@ -1,11 +1,16 @@
|
||||||
import { writeFile } from 'fs/promises';
|
import { writeFile } from 'fs/promises';
|
||||||
|
|
||||||
import { getArg } from './lib/args.js';
|
import { getArg } from './lib/args.js';
|
||||||
import { getChildren } from './lib/io.js';
|
import { getChildDirectories } from './lib/io.js';
|
||||||
import { error, log } from './lib/log.js';
|
import { error, log } from './lib/log.js';
|
||||||
|
|
||||||
const ctx = 'initDb.js';
|
const ctx = 'initDb.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes a user db from a list of existing directories
|
||||||
|
* at the specified `--path` parameter when executing the command.
|
||||||
|
* Useful when there is already a collection of folders.
|
||||||
|
*/
|
||||||
const initDb = async () => {
|
const initDb = async () => {
|
||||||
log(ctx, 'Grabbing existing directories');
|
log(ctx, 'Grabbing existing directories');
|
||||||
let directory = '';
|
let directory = '';
|
||||||
|
@ -15,7 +20,7 @@ const initDb = async () => {
|
||||||
error(ctx, err);
|
error(ctx, err);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const children = await getChildren(directory);
|
const children = await getChildDirectories(directory);
|
||||||
const db = children.map(child => ({
|
const db = children.map(child => ({
|
||||||
'user': child,
|
'user': child,
|
||||||
'lastUpdated': 'never',
|
'lastUpdated': 'never',
|
||||||
|
|
Loading…
Add table
Reference in a new issue