// Convention: `url` is a string, `parsedUrl` is a `URL` instance.

import queryParametersWeCanStrip from '../../copied-from-backend/queryParametersWeCanStrip.json';
import makeLogger from './makeLogger';

const logger = makeLogger(__filename);

// Normalizers modify their input and return the normalized segment
type Normalizer = (url: URL) => string;
type ShouldHalt = (normalizer: Normalizer, normalizedSegment: string) => boolean | undefined;

/*
  See `normalizeUrl` to understand why these are in an array.
  They're ordered by what's most likely to be different between any two URLs, but I don't think it matters too much.
*/
const normalizers: Normalizer[] = [
  function normalizeURLPathname(parsedUrl) {
    if (!parsedUrl.pathname.endsWith('/')) {
      parsedUrl.pathname += '/';
    }
    return parsedUrl.pathname;
  },

  function normalizeURLSearchParams(parsedUrl) {
    // Remove query parameters which should be ignored when comparing URLs
    for (const queryParameter of queryParametersWeCanStrip) {
      parsedUrl.searchParams.delete(queryParameter);
    }

    parsedUrl.searchParams.sort();
    return parsedUrl.searchParams.toString();
  },

  function normalizeURLHostname(parsedUrl) {
    parsedUrl.hostname = parsedUrl.hostname.replace('www.', '');
    return parsedUrl.hostname;
  },

  function normalizeURLHash(parsedUrl) {

    /*
      Strip hash fragment if there's no `/` in it (as not to break Gmail).
      If we're keeping it, strip text fragment from it.
    */
    if (parsedUrl.hash) {
      if (parsedUrl.hash.includes('/') || parsedUrl.hash.includes('%2F')) {
        parsedUrl.hash = parsedUrl.hash.replace(/#?:~:text.*?$/i, '');

        // If now empty, remove it
        if (parsedUrl.hash === '#') {
          parsedUrl.hash = '';
        }
      } else {
        parsedUrl.hash = '';
      }
    }
    return parsedUrl.hash;
  },

  function normalizeURLProtocol(parsedUrl) {
    if (parsedUrl.protocol === 'http:') {
      parsedUrl.protocol = 'https:';
    }
    return parsedUrl.protocol;
  },

  function normalizeURLAuthentication(parsedUrl) {
    parsedUrl.password = '';
    parsedUrl.username = '';
    return '';
  },

  function normalizeURLPort(parsedUrl) {
    parsedUrl.port = '';
    return '';
  },
];

// This is optimized for performance
export function areUrlsEqual(urlA: string, urlB: string): boolean {
  if (urlA === urlB) {
    return true; // Woo!
  }

  let parsedUrlB: URL;

  try {
    parsedUrlB = new URL(urlB);
  } catch (e) {
    logger.warn('could not parse url', { url: urlB });
    return false; // There's nothing we can do; we check earlier if the unnormalized URLs are equal
  }

  // If we detect the URLs are different during normalization (i.e. the hosts are different), we can halt it
  let wereFoundToBeDifferentDuringNormalization = false;
  const normalizedUrlA = normalizeUrl(urlA, {
    shouldHalt: function shouldHalt(normalizer, normalizedSegmentA) {
      // Compare the normalized segments. Note: `normalizer` modifies its input and returns the normalized segment
      if (normalizedSegmentA === normalizer(parsedUrlB)) {
        return false;
      }
      wereFoundToBeDifferentDuringNormalization = true;
      return true;
    },
  });

  return !wereFoundToBeDifferentDuringNormalization &&
    // Checking if they're equal here covers edge cases; e.g the `hostname`s are equal but the `host`s are not
    normalizedUrlA === parsedUrlB.toString();
}

/*
  This needs to stay as close as possible (in output) to `reader.utils.normalize_url` in Python.
  This is optimized for performance.
  You can optionally halt the normalization after (segment) normalization step; see `areUrlsEqual` for an example.
*/
export function normalizeUrl(
  url: string,
  options: {
    shouldHalt?: ShouldHalt;
  } = {},
): string {
  const shouldHalt = options.shouldHalt ?? (() => false);

  let parsedUrl: URL;
  try {
    parsedUrl = new URL(url);
  } catch (e) {
    logger.warn('could not parse url', { url });
    return url;
  }

  for (const normalizer of normalizers) {
    const normalizedSegment = normalizer(parsedUrl);
    if (shouldHalt(normalizer, normalizedSegment)) {
      return parsedUrl.toString();
    }
  }

  return parsedUrl.toString();
}
