/** EXTERNALS **/

import { unified } from 'unified';
import retextLatin from 'retext-latin';
import retextEnglish from 'retext-english';
import retextEmoji from 'retext-emoji';

/** LOCALS **/

/** HELPERS **/

const parsers = {
  en: unified().use(retextEnglish).use(retextEmoji),
  default: unified().use(retextLatin).use(retextEmoji),
};

/** MAIN **/

const collectText = function (node, text = []) {
  if (!node.children) {
    text.push(node.value);

    return text;
  }

  node.children.forEach((node) => collectText(node, text));

  return text;
};

const collectTokens = function (node, tokens = []) {
  if (!node.children) {
    tokens.push({
      isWord: false,
      value: node.value,
    });

    return tokens;
  }

  if (node.type === 'WordNode') {
    const value = collectText(node).join('');

    tokens.push({
      isWord: true,
      value,
    });

    return tokens;
  }

  node.children.forEach((childNode) => {
    collectTokens(childNode, tokens);
  });

  return tokens;
};

export const tokenize = function ({ language, text }) {
  const tree = (parsers[language] || parsers.default).parse({ value: text });

  const result = collectTokens(tree).map(function (node) {
    return node.value.length < 3 ? { ...node, isWord: false } : node;
  });

  return result;
};
