/**
 * Compile the regex early for better performance
 * First group contains two named groups, protocol and host, separated by a group that captures the '://'
 * Second group then has two named groups for country and language
 * Path named group (everything after the country/language)
 * Query Params named group
 * @constant
 * @type {regex}
 *
 * TODO: handle fragments. Right now they are either grouped with path or query depending on if they
 *       are after the path or after the query.
 *
 */
const re =
  /((?<protocol>https?)?(:\/\/)?(?<host>[^/]*\.[^/]*))?(\/(?<country>\w{2})?\/(?<language>\w{2}))?(?<path>\/[^?]*)?(?<query>\?.*)?/

export interface ParsedUrl {
  protocol?: string
  host?: string
  country?: string
  language?: string
  path?: string
  query?: string
}

/**
 * This function removes any double slashes from the url
 * @param url
 */
function stripDoubleSlash(url: string): string {
  const redundantParenthesis = url.match(/(?<protocol>https?:\/\/)/)
  let originalHostAndPath = url
  if (redundantParenthesis && redundantParenthesis.groups?.protocol) {
    originalHostAndPath = url.replace(redundantParenthesis.groups.protocol, '')
  }
  const stripedOriginalHostAndPath = originalHostAndPath.replace(/\/{2,}/g, '/')

  url = url.replace(originalHostAndPath, stripedOriginalHostAndPath)
  return url
}

/**
 * This function should be the main way that we parse urls throughout the app.
 * It is typically best to provide defaults for path and query when
 * constructing urls using parseUrl
 * @param {string} url
 * @returns ParsedUrl
 */
export function parseUrl(url: string): ParsedUrl {
  url = stripDoubleSlash(url)
  const match = url.match(re)
  return {
    ...match?.groups,
  }
}
