handle /search API after running /media requests
This commit is contained in:
parent
cae6761d24
commit
eec7e72ce0
2 changed files with 53 additions and 10 deletions
54
lib/dl.js
54
lib/dl.js
|
@ -4,21 +4,44 @@ import { getArg } from './args.js';
|
||||||
import { error, log } from './log.js';
|
import { error, log } from './log.js';
|
||||||
|
|
||||||
const ctx = 'getUser.js';
|
const ctx = 'getUser.js';
|
||||||
|
const loggedDateRegex = new RegExp('[a-zA-Z0-9]+\-[0-9]+\-([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]{2})([0-9]{2})([0-9]{2})', 'gm');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a date formatted as yyyy-mm-dd for use with `/search`
|
||||||
|
* from the last logged date (which is assumed to be the oldest retrieved image from a previous run)
|
||||||
|
* @param {string[]} logs
|
||||||
|
*/
|
||||||
|
const getDateUrlFromLog = (logs) => {
|
||||||
|
// sometimes the logs get grouped into one single string, while others get individually logged
|
||||||
|
// may as well flatten it to a single string and test with regex to make consistent and avoid bugs
|
||||||
|
const flat = logs.flat().join('\n');
|
||||||
|
let result = loggedDateRegex.exec(flat), last = loggedDateRegex.exec(flat);
|
||||||
|
while (!!last) {
|
||||||
|
result = last;
|
||||||
|
last = loggedDateRegex.exec(flat);
|
||||||
|
}
|
||||||
|
let date = new Date(result[1], result[2], result[3], result[4], result[5], result[6], 0);
|
||||||
|
// to be safe, spring forward a day
|
||||||
|
date.setDate(date.getDate() + 1);
|
||||||
|
|
||||||
|
return `${date.getUTCFullYear()}-${date.getUTCMonth()}-${date.getUTCDate()}`;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Runs {@link getUser} concurrently for many users
|
* Runs {@link getUser} concurrently for many users
|
||||||
* @param {{ user: string, logs: string[] }[]} userDb userDb to run {@link getUser} on
|
* @param {{ user: string, logs: string[] }[]} userDb userDb to run {@link getUser} on
|
||||||
* @param {number} threadMax max number of threads to run concurrently
|
* @param {number} threadMax max number of threads to run concurrently
|
||||||
* @param {string} directory the directory to save the user media folders in
|
* @param {string} directory the directory to save the user media folders in
|
||||||
|
* @param {'media' | 'search'} mode whether to retrieve from the `/media` route or `/search` API
|
||||||
* @returns {Promise<void>} promise which resolves once all threads for every user have finished
|
* @returns {Promise<void>} promise which resolves once all threads for every user have finished
|
||||||
*/
|
*/
|
||||||
export const getMany = (userDb, threadMax, directory) => new Promise((resolve, reject) => {
|
export const getMany = (userDb, threadMax, directory, mode) => new Promise((resolve, reject) => {
|
||||||
let running = 0;
|
let running = 0;
|
||||||
let index = 0;
|
let index = 0;
|
||||||
|
|
||||||
const get = () => {
|
const get = () => {
|
||||||
const onFinish = (currentIndex) => {
|
const onFinish = (currentIndex) => {
|
||||||
log(ctx, `Finished ${userDb[currentIndex].user}/media`);
|
log(ctx, `Finished ${userDb[currentIndex].user} under ${mode} mode`);
|
||||||
--running;
|
--running;
|
||||||
get();
|
get();
|
||||||
}
|
}
|
||||||
|
@ -26,8 +49,12 @@ export const getMany = (userDb, threadMax, directory) => new Promise((resolve, r
|
||||||
while (running < threadMax && index < userDb.length) {
|
while (running < threadMax && index < userDb.length) {
|
||||||
++running;
|
++running;
|
||||||
let currentIndex = index++;
|
let currentIndex = index++;
|
||||||
|
const modeParams = mode === 'media' ? 'media' : {
|
||||||
|
from: '2007-12-31',
|
||||||
|
to: getDateUrlFromLog(userDb[currentIndex].logs),
|
||||||
|
};
|
||||||
|
|
||||||
let proc = getUser(userDb[currentIndex].user, directory);
|
let proc = getUser(userDb[currentIndex].user, directory, modeParams);
|
||||||
proc.stdout.on('data', data => {
|
proc.stdout.on('data', data => {
|
||||||
userDb[currentIndex].logs.push(data);
|
userDb[currentIndex].logs.push(data);
|
||||||
});
|
});
|
||||||
|
@ -47,10 +74,23 @@ export const getMany = (userDb, threadMax, directory) => new Promise((resolve, r
|
||||||
* Retrieves gallery for specified user and saves to the specified parent path
|
* Retrieves gallery for specified user and saves to the specified parent path
|
||||||
* @param {string} user the user to retrieve media from
|
* @param {string} user the user to retrieve media from
|
||||||
* @param {string} path the path to save the user's media folder in
|
* @param {string} path the path to save the user's media folder in
|
||||||
|
* @param {'media' | { from: string, to: string }} modeParams depending on the mode:
|
||||||
|
* - if `'media'`, will retrieve from /media
|
||||||
|
* - if object, will retrieve from /search?f=live&src=typed_query&q=(from%3A`<USER>`)%20since%3A<FROM|yyyy-mm-dd>%20until%3A`<TO|yyyy-mm-dd>`
|
||||||
* @returns {ChildProcess} the process that was {@link spawn spawned}
|
* @returns {ChildProcess} the process that was {@link spawn spawned}
|
||||||
*/
|
*/
|
||||||
export const getUser = (user, path) => {
|
export const getUser = (user, path, modeParams) => {
|
||||||
const url = `https://twitter.com/${user}/media`;
|
//"https://twitter.com/search?f=live&q=(from%3Aad_varg)%20until%3A2023-06-30%20since%3A2007-01-01&src=typed_query"
|
||||||
|
let url = `https://twitter.com/${user}/media`;
|
||||||
|
if (modeParams !== 'media') {
|
||||||
|
url = `https://twitter.com/search?f=live&src=typed_query&q=(from%3A${user})`;
|
||||||
|
if (!!modeParams.from) {
|
||||||
|
url += `%20since%3A${modeParams.from}`;
|
||||||
|
}
|
||||||
|
if (!!modeParams.to) {
|
||||||
|
url += `%20until%3A${modeParams.to}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
let args;
|
let args;
|
||||||
try {
|
try {
|
||||||
args = getArg('args');
|
args = getArg('args');
|
||||||
|
@ -58,8 +98,8 @@ export const getUser = (user, path) => {
|
||||||
log(ctx, 'No args being provided to gallery-dl');
|
log(ctx, 'No args being provided to gallery-dl');
|
||||||
}
|
}
|
||||||
|
|
||||||
log(ctx, `python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} ${url}`);
|
log(ctx, `python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} "${url}"`);
|
||||||
const proc = spawn(`python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} ${url}`, { shell: true });
|
const proc = spawn(`python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} "${url}"`, { shell: true });
|
||||||
|
|
||||||
proc.stdout.on('data', data => {
|
proc.stdout.on('data', data => {
|
||||||
log(ctx, data);
|
log(ctx, data);
|
||||||
|
|
|
@ -39,10 +39,13 @@ const downloadDb = async () => {
|
||||||
logs: [],
|
logs: [],
|
||||||
}));
|
}));
|
||||||
|
|
||||||
log(ctx, `Building db using <user>/media for ${processes.length} users`);
|
log(ctx, `Downloading media using <user>/media for ${processes.length} users`);
|
||||||
await getMany(processes, threadMax, directory);
|
await getMany(processes, threadMax, directory, 'media');
|
||||||
|
|
||||||
log(ctx, 'Building db using /search');
|
log(ctx, 'Downloading media using /search');
|
||||||
|
await getMany(processes, threadMax, directory, 'search');
|
||||||
|
|
||||||
|
log(ctx, 'Done');
|
||||||
}
|
}
|
||||||
|
|
||||||
downloadDb();
|
downloadDb();
|
||||||
|
|
Loading…
Add table
Reference in a new issue