1
0
Fork 0
This commit is contained in:
lightling 2024-02-09 21:13:23 -05:00
parent 121452e1c9
commit cae6761d24
7 changed files with 132 additions and 78 deletions

View file

@ -1,5 +1,11 @@
let argRegex = new RegExp('(\-\-[a-zA-Z0-9]+)(=)(.*)');
/**
* Parses args from node process
* @returns { {[ key: string ]: string | string[]} } parsed args array;
* if the arg followed format `--arg=val`, it will be indexed as `args[arg]=val`;
* otherwise it will be pushed to an array under `args['_']`
*/
const parseArgs = () => {
let _args = { '_': [] };
[...process.argv.slice(2)].forEach(arg => {
@ -16,6 +22,12 @@ const parseArgs = () => {
export const args = parseArgs();
/**
* Gets value of specified arg
* @param {string} argName the name of the arg to retrieve
* @returns {string} value of specified arg
* @throws will throw error if arg was not specified
*/
export const getArg = (argName) => {
let arg = args[argName];
if (!!arg) {

View file

@ -5,6 +5,50 @@ import { error, log } from './log.js';
const ctx = 'getUser.js';
/**
* Runs {@link getUser} concurrently for many users
* @param {{ user: string, logs: string[] }[]} userDb userDb to run {@link getUser} on
* @param {number} threadMax max number of threads to run concurrently
* @param {string} directory the directory to save the user media folders in
* @returns {Promise<void>} promise which resolves once all threads for every user have finished
*/
export const getMany = (userDb, threadMax, directory) => new Promise((resolve, reject) => {
let running = 0;
let index = 0;
const get = () => {
const onFinish = (currentIndex) => {
log(ctx, `Finished ${userDb[currentIndex].user}/media`);
--running;
get();
}
while (running < threadMax && index < userDb.length) {
++running;
let currentIndex = index++;
let proc = getUser(userDb[currentIndex].user, directory);
proc.stdout.on('data', data => {
userDb[currentIndex].logs.push(data);
});
proc.stderr.on('data', _ => onFinish(currentIndex));
proc.on('close', _ => onFinish(currentIndex));
proc.on('error', _ => onFinish(currentIndex));
}
if (running === 0) {
resolve();
}
}
get();
});
/**
* Retrieves gallery for specified user and saves to the specified parent path
* @param {string} user the user to retrieve media from
* @param {string} path the path to save the user's media folder in
* @returns {ChildProcess} the process that was {@link spawn spawned}
*/
export const getUser = (user, path) => {
const url = `https://twitter.com/${user}/media`;
let args;
@ -18,7 +62,7 @@ export const getUser = (user, path) => {
const proc = spawn(`python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} ${url}`, { shell: true });
proc.stdout.on('data', data => {
//log(ctx, data);
log(ctx, data);
});
proc.stderr.on('data', data => {
error(ctx, data);

View file

@ -1,6 +1,11 @@
import { readdir } from 'fs/promises'
export const getChildren = async source =>
/**
* Gets the directories under the specified source directory
* @param {string} source path to the parent directory
* @returns {Promise<string[]>} the names of the child directories
*/
export const getChildDirectories = async source =>
(await readdir(source, { withFileTypes: true }))
.filter(dirent => dirent.isDirectory())
.map(dirent => dirent.name)

View file

@ -2,15 +2,29 @@ const options = { year: 'numeric', day: '2-digit', month: '2-digit', hour: '2-di
const osLocale = (process.env.LANG || process.env.LANGUAGE || process.env.LC_ALL || process.env.LC_MESSAGES).split('.')[0].replace('_', '-');
const dtFormat = Intl.DateTimeFormat(osLocale, options);
/**
* Gets formatted timestamp
* @returns {string} timestamp string
*/
export const getTime = () => {
return dtFormat.format(new Date());
}
/**
* Logs formatted output to the console
* @param {string} src the source script doing the logging
* @param {string} msg the message to log
*/
export const log = (src, msg) => {
const time = getTime();
console.log(`${time} : ${src} : ${msg}`);
};
/**
* Logs formatted output to the console as an error
* @param {string} src the source script doing the logging
* @param {string} msg the message to log
*/
export const error = (src, msg) => {
const time = new Date().toISOString();
console.error(`${time} : ${src} : ${msg}`);

View file

@ -1,74 +0,0 @@
import { readFile } from 'fs/promises';
import { getArg } from './lib/args.js';
import { getUser } from './lib/dl.js';
import { error, log } from './lib/log.js';
const ctx = 'buildDb.js';
const getMany = (processes, threadMax, directory) => new Promise((resolve, reject) => {
let running = 0;
let index = 0;
const get = () => {
const onFinish = (currentIndex) => {
log(ctx, `Finished ${processes[currentIndex].user}/media`);
--running;
get();
}
while (running < threadMax && index < processes.length) {
++running;
let currentIndex = index++;
let proc = getUser(processes[currentIndex].user, directory);
proc.stdout.on('data', data => {
processes[currentIndex].logs.push(data);
});
proc.stderr.on('data', _ => onFinish(currentIndex));
proc.on('close', _ => onFinish(currentIndex));
proc.on('error', _ => onFinish(currentIndex));
}
if (running === 0) {
resolve();
}
}
get();
});
const buildDb = async () => {
log(ctx, 'Grabbing db');
let directory = '', threadMax = 1, db;
try {
directory = getArg('path');
} catch (err) {
error(ctx, err);
return;
}
try {
threadMax = getArg('threads');
log(ctx, `Using ${threadMax} threads`);
} catch (err) {
log(ctx, 'Using 1 thread');
}
try {
let file = await readFile(`${directory}/db.json`, { encoding: 'utf8' });
db = JSON.parse(file);
} catch (err) {
error(ctx, err);
return;
}
let processes = db.map(entry => ({
...entry,
logs: [],
}));
log(ctx, `Building db using <user>/media for ${processes.length} users`);
await getMany(processes, threadMax, directory);
log(ctx, 'Building db using /search');
}
buildDb();

48
run-downloadDb.js Normal file
View file

@ -0,0 +1,48 @@
import { readFile } from 'fs/promises';
import { getArg } from './lib/args.js';
import { getMany } from './lib/dl.js';
import { error, log } from './lib/log.js';
const ctx = 'downloadDb.js';
/**
* Downloads all media possible for the users stored in db.json at the specified `--path`.
* Useful for first run or for augmenting existing media
* if it may be only partially archived in an uncertain state.
*/
const downloadDb = async () => {
log(ctx, 'Grabbing db');
let directory = '', threadMax = 1, db;
try {
directory = getArg('path');
} catch (err) {
error(ctx, err);
return;
}
try {
threadMax = getArg('threads');
log(ctx, `Using ${threadMax} threads`);
} catch (err) {
log(ctx, 'Using 1 thread');
}
try {
let file = await readFile(`${directory}/db.json`, { encoding: 'utf8' });
db = JSON.parse(file);
} catch (err) {
error(ctx, err);
return;
}
let processes = db.map(entry => ({
...entry,
logs: [],
}));
log(ctx, `Building db using <user>/media for ${processes.length} users`);
await getMany(processes, threadMax, directory);
log(ctx, 'Building db using /search');
}
downloadDb();

View file

@ -1,11 +1,16 @@
import { writeFile } from 'fs/promises';
import { getArg } from './lib/args.js';
import { getChildren } from './lib/io.js';
import { getChildDirectories } from './lib/io.js';
import { error, log } from './lib/log.js';
const ctx = 'initDb.js';
/**
* Initializes a user db from a list of existing directories
* at the specified `--path` parameter when executing the command.
* Useful when there is already a collection of folders.
*/
const initDb = async () => {
log(ctx, 'Grabbing existing directories');
let directory = '';
@ -15,7 +20,7 @@ const initDb = async () => {
error(ctx, err);
return;
}
const children = await getChildren(directory);
const children = await getChildDirectories(directory);
const db = children.map(child => ({
'user': child,
'lastUpdated': 'never',