import langTags from './data/language-tags.json';

/**
 * Language Tags (IETF BCP 47)
 * @memberof module:@sm/locale
 *
 * Original definition:
 * @see https://github.com/mntv-survey-platform/smsdk.surveysvc/blob/main/smsdk/surveysvc/lib/languages.yml
 *
 * Uses a partial syntax (excludes: extlang, extension, privateuse) @see https://tools.ietf.org/search/bcp47#section-2.1
 */
export type LanguageSubTags = {
  /** ISO 639 code */
  primary: string;
  /** ISO 15924 code  */
  script: string | null;
  /** ISO 3166-1 code  */
  region: string | null;
  /** registered variants */
  variant: string | null;
};
export type LanguageTagDefinition = {
  /** Momentive ID of language */
  id: string;
  /** Internal name of language */
  name: string;
  /** Display name of language (if different from `name`) */
  displayName: string;
  /** complete language tag  */
  code: string;
} & LanguageSubTags;

const tagRegEx =
  /^(?<primary>[a-z]{2,3})(_(?<script>[A-Z]{1}[a-z]{3}))?(_(?<region>[A-Z]{2}|[0-9]{3}))?(_(?<variant>[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))?$/;
const LANGUAGE_TAGS: LanguageTagDefinition[] = langTags;

/**
 * Break a sting Language Tag into it's components based on IETF BCP 47
 *
 * @param tag - (_casing sensitive_) the language tag to be parsed, e.g.:
 *   - `pt` for portuguese
 *   - `pt_BR` for brasilia portuguese
 *   - `zh_Hant_TW` for _Chinese (Traditional, Taiwan)_
 *   - `en_US_POSIX` for _English (United States, Computer)_
 */
export function parseLanguageTag(tag: string): LanguageSubTags | undefined {
  const matches = tagRegEx.exec(tag);
  return matches?.groups as LanguageSubTags;
}

/**
 * Replace the delimiter of the string Language Tag, by default to the ISO standard `-`
 *
 * @param tag - the language tag to be converted, e.g.:
 *   - `pt` for portuguese
 *   - `pt_BR` for brasilia portuguese
 *   - `zh_Hant_TW` for _Chinese (Traditional, Taiwan)_
 *   - `en_US_POSIX` for _English (United States, Computer)_
 */
export function convertLanguageTag(tag: string, delimiter = '-'): string {
  return tag.replace(/[_-]/gi, delimiter);
}

/**
 * Convert the string Language Tag to all lowercase hyphenated representation
 *
 * @example `zh_Hant_TW` becomes `zh-hant-tw`
 *
 * @param tag - the language tag to be normalized, e.g.:
 *   - `pt` for portuguese
 *   - `pt_BR` for brasilia portuguese
 *   - `zh_Hant_TW` for _Chinese (Traditional, Taiwan)_
 *   - `en_US_POSIX` for _English (United States, Computer)_
 */
export function normalizeLanguageTag(tag: string): string {
  return convertLanguageTag(tag).toLowerCase();
}

/** Get array of all possible `LanguageTagDefinition` */
export function getLanguageTags(): LanguageTagDefinition[] {
  return LANGUAGE_TAGS;
}

/**
 * Get a specific `LanguageTagDefinition` by its id
 *
 * @param id example `"1"` for _English_ or `"361"` for _Croatian (Bosnia and Herzegovina)_
 * @returns
 */
export function getLanguageTagById(
  id: string
): LanguageTagDefinition | undefined {
  return LANGUAGE_TAGS.find(def => def.id === id);
}

/**
 * Convert the string Language Tag to all lowercase hyphenated representation
 *
 * @example `zh_Hant_TW` becomes `zh-hant-tw`
 *
 * @param code - the code (aka. language tag) to be searched for, e.g.:
 *   - `pt` for portuguese
 *   - `pt_BR` for brasilia portuguese
 *   - `zh_Hant_TW` for _Chinese (Traditional, Taiwan)_
 *   - `en_US_POSIX` for _English (United States, Computer)_
 */
export function getLanguageTagByCode(
  code: string
): LanguageTagDefinition | undefined {
  const nt = normalizeLanguageTag;
  return LANGUAGE_TAGS.find(def => nt(def.code) === nt(code));
}
