import isRelativeUrl from 'is-relative-url';
import thirdPartyNormalizeUrl from 'normalize-url';
import parseUrl from 'parse-url';
import TurndownService from 'turndown';

import urlJoin from './urlJoin';

// Taken from the turndown source
function cleanAttribute(attribute?: string | null) {
  return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : '';
}

function processUrl(input: string, pageUrl: string): string {
  if (!input) {
    return input;
  }

  if (!isRelativeUrl(input) || !pageUrl) {
    return input;
  }

  if (input.startsWith('//')) {
    const parsedPageUrl = parseUrl(pageUrl, false);
    return `${parsedPageUrl.protocol}:${input}`;
  }

  let firstPart;

  if (input.startsWith('/')) {
    const parsedPageUrl = parseUrl(pageUrl, false);
    firstPart = `${parsedPageUrl.protocol}://${parsedPageUrl.resource}`;
  } else {
    firstPart = pageUrl;
  }

  try {
    return urlJoin([
      thirdPartyNormalizeUrl(
        firstPart,
        {
          // eslint-disable-next-line @typescript-eslint/ban-ts-comment
          // @ts-ignore
          removeQueryParameters: true,
          stripHash: true,
          stripWWW: false,
        },
      ),
      input,
    ]);
  } catch (e) {
    return input;
  }
}

TurndownService.prototype.escape = (input: string) => input;

const convertHtmlToText = (input: string, pageUrl: string): string => {
  const turndownService = new TurndownService({
    bulletListMarker: '•' as '-',
    emDelimiter: '*',
  });

  // Relative anchor URLs to absolute
  turndownService.addRule('inlineLink', {
    // eslint-disable-next-line
    // @ts-ignore
    filter(node, options) {
      return (
        options.linkStyle === 'inlined' &&
        node.nodeName === 'A' &&
        node.getAttribute('href')
      );
    },

    replacement(content, node) {
      if (!('getAttribute' in node)) {
        throw new Error('node.getAttribute does not exist');
      }
      const unprocessedHref = node.getAttribute('href');
      if (!unprocessedHref) {
        throw new Error('No href found');
      }
      const href = processUrl(unprocessedHref, pageUrl);
      return `[${content}](${href})`;
    },
  });

  turndownService.addRule('heading', {
    filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
    replacement: (content) => content,
  });

  // Relative image URLs to absolute
  turndownService.addRule('image', {
    filter: 'img',

    replacement(content, node) {
      const element = node as HTMLElement;
      const alt = cleanAttribute(element.getAttribute?.('alt'));
      const src = processUrl(element.getAttribute?.('src') || '', pageUrl);
      const title = cleanAttribute(element.getAttribute?.('title'));
      const titlePart = title ? ` "${title}"` : '';
      return src ? `![${alt}](${src}${titlePart})` : '';
    },
  });

  turndownService.remove('del');
  turndownService.remove('style');

  return turndownService.turndown(input).trim();
};
export default convertHtmlToText;
