import { mkdir, readdir, rename } from 'fs/promises';

import { getArg } from './lib/args.js';
import { getChildDirectories } from './lib/io.js';
import { error, log } from './lib/log.js';

const ctx = 'dedupeIds.js';
const idDateTimeRegex = new RegExp('([0-9]*)-(([0-9]{4})([0-9]{2})([0-9]{2}))_(([0-9]{2})([0-9]{2})([0-9]{2}))-((img|vid|gif)([0-9]*))');

/**
 * Reads through the db directory and looks for duplicate entries by id.
 *
 * If they have the same id, it will use the provided offset to determine which time is correct.
 * - If `--offset="-"`, it will mark the file with the older date as a duplicate.
 * - If `--offset="+"`, it will mark the file with the newer date as a duplicate.
 * 
 * This exists because `twittermediadownloader` used local time when saving files, which would possibly not be fixed because of DST.
 * This repo uses UTC when saving times for the sake of consistency.
 */
const dedupeIds = async () => {
  let directory, offset;
  try {
    directory = getArg('path');
    offset = getArg('offset');
  } catch (err) {
    error(ctx, err);
    return;
  }

  log(ctx, 'Reading directories');
  const userDirs = await getChildDirectories(directory);

  userDirs.forEach(async user => {
    let workingDir = `${directory}/${user}`;
    try {
      await mkdir(`${workingDir}/dups`);
    } catch (err) {
      if (!err.toString().includes('EEXIST')) {
        error(ctx, err);
      }
    }

    log(ctx, `Finding dups for ${user}`);
    const files = await readdir(workingDir);
    const dupsToMove = [];

    for (let i = 0; i < files.length; ++i) {
      // Skip when the current file is already in dupsToMove
      if (dupsToMove.includes(files[i])) {
        continue;
      }

      // Go back/fourth ~5 images, as the max per post is 4; this should cover enough files without too much looping
      let neighborMin = Math.min(Math.max(i - 5, 0), files.length - 1);
      let neighborMax = Math.min(Math.max(i + 5, 0), files.length - 1);
      for (let neighbor = neighborMin; neighbor < neighborMax; ++neighbor) {
        if (dupsToMove.includes(files[neighbor]) || neighbor === i) {
          continue;
        }

        // Test the neighbor and self for regex pattern id-date_time-typeCount.extension
        let neighborTest = idDateTimeRegex.exec(files[neighbor]);
        let selfTest = idDateTimeRegex.exec(files[i]);
        if (!!neighborTest && !!selfTest) {
          // If the id and the typeCount (e.g. img1 === img1) are the same, then the file is a dup
          if (neighborTest[1] === selfTest[1] && neighborTest[10] === selfTest[10]) {
            // Create dates from the date_time component of the files
            let neighborDate = new Date(neighborTest[3], neighborTest[4], neighborTest[5], neighborTest[7], neighborTest[8], neighborTest[9]);
            let selfDate = new Date(selfTest[3], selfTest[4], selfTest[5], selfTest[7], selfTest[8], selfTest[9]);

            if (selfDate.getTime() > neighborDate.getTime() && offset.includes('-')) {
              dupsToMove.push(files[neighbor]);
              log(ctx, `${files[neighbor]} duplicate of ${files[i]}`);
            } else {
              dupsToMove.push(files[i]);
              log(ctx, `${files[i]} duplicate of ${files[neighbor]}`);
            }
          }
        }
      }
    }

    log(ctx, `Moving dups for ${user}`);
    dupsToMove.forEach(async dup => {
      let dupMove = rename(`${workingDir}/${dup}`, `${workingDir}/dups/${dup}`);
      dupMove.catch(err => error(ctx, err));
      await dupMove;
    });

    log(ctx, `Moving finished for ${user}`);
  });
}

dedupeIds();