addon.js

/**
 * Simple binding of OpenJTalk.
 * @module node-openjtalk-binding
 */
const binary = require("@tignear/node-pre-gyp");
const path = require('path');
const meta = binary.meta(path.resolve(path.join(__dirname, './package.json')));
const { synthesis: _synthesis } = require(meta.module);
const { promises: fs } = require("fs");
const path_to_dictionary = path.resolve(path.join(meta.module_path, 'dictionary'));

/**
 * @typedef {Object} Dictionary
 * @property {Uint8Array|ArrayBuffer} unkdic unk.dic
 * @property {Uint8Array|ArrayBuffer} sysdic sys.dic
 * @property {Uint8Array|ArrayBuffer} property char.bin
 * @property {Uint8Array|ArrayBuffer} matrix matrix.bin
 */

/**
 * @typedef {Object} OpenJTalkOptions
 * @property {!string|Uint8Array|ArrayBuffer} htsvoice Path to htsvoice. Or data ArrayBuffer,Buffer.
 * @property {string|Dictionary} [dictionary] Dictionary object or Path to dictionary. NOT be URL nor Buffer. Must be encoded by UTF-8. The default is to use dictionary_dir.
 * @property {number} [sampling_frequency] Must be int. 1<=sampling_frequency.
 * @property {number} [frame_period] Must be int. 1<=frame_period.
 * @property {number} [all_pass_constant] 0.0<=all_pass_constant<=1.0.
 * @property {number} [postfiltering_coefficient] Default is 0.0. 0.0<=postfiltering_coefficient<=1.0.
 * @property {number} [speech_speed_rate] Default is 1.0. 0<=speech_speed_rate. Warning: Do not set a very small value as it consumes CPU time.
 * @property {number} [additional_half_tone] Default is 0.0.
 * @property {number} [voiced_unvoiced_threshold] Default is 0.5. 0.0<=voiced_unvoiced_threshold<=1.0.
 * @property {number} [weight_of_GV_for_spectrum] Default is 1.0. 0.0<=weight_of_GV_for_spectrum.
 * @property {number} [weight_of_GV_for_log_F0] Default is 1.0. 0.0<=weight_of_GV_for_log_F0.
 * @property {number} [volume_in_dB] Default is 0.0.
 * @property {number} [audio_buffer_size] Disabled as default. 0 regarded disabled. 0<=audio_buffer_size
 */

/**
 * @typedef {Object} WaveObject
 * @property {!Buffer} raw_data  Synthesized PCM by host byte order. 
 * @property {!Int16Array} data Synthesized PCM.
 * @property {!16} bitDepth LINEAR16.
 * @property {!number} sampleRate Equals to OpenJTalkOptions#sampling_frequency if presented for synthesis function. Else automatically determined value.
 * @property {!1} numChannels monaural.
 */

/**
 * Read mecab dictionary.
 * @async
 * @function
 * @static
 * @param {String} path_to_dictionary 
 * @returns {Promise<Dictionary>}
 */
const readDictionary = exports.readDictionary = async function readDictionary(path_to_dictionary) {
  const [unkdic, sysdic, property, matrix] = (await Promise.all(
    [
      fs.readFile(path.resolve(path_to_dictionary, "unk.dic")),
      fs.readFile(path.resolve(path_to_dictionary, "sys.dic")),
      fs.readFile(path.resolve(path_to_dictionary, "char.bin")),
      fs.readFile(path.resolve(path_to_dictionary, "matrix.bin"))
    ]
  )).map(e => e.buffer);
  return {
    unkdic,
    sysdic,
    property,
    matrix
  };
}

const default_dictionary = readDictionary(path_to_dictionary);

/** 
 * Synthesis voice with OpenJTalk
 * @async
 * @function
 * @static
 * @param {string} text Text to synthesize.
 * @param {OpenJTalkOptions} options OpenJTalk synthesize option.
 * @return {Promise<WaveObject>} Synthesized PCM.
 */
exports.synthesis = async function synthesis(text, options) {
  let htsvoice = options.htsvoice;
  if (typeof htsvoice === "string") {
    htsvoice = await fs.readFile(htsvoice);
  }
  if (htsvoice instanceof Uint8Array) {
    htsvoice = htsvoice.buffer;
  }
  let dictionary = options.dictionary;
  if (!dictionary) {
    dictionary = await default_dictionary;
  } else if (typeof dictionary == "string") {
    dictionary = await readDictionary(dictionary);
  } else {
    dictionary = Object.fromEntries(Object.entries(dictionary).map(([k, v]) => [k, v instanceof Uint8Array ? v.buffer : v]));
  }
  return new Promise((resolve, reject) => {
    if (!text) reject(new TypeError("The first argument must be a non-empty string"));
    function cb(err, /** @type {Buffer} */ buffer, /** @type {number} */ sampleRate) {
      if (err) {
        reject(err);
        return;
      }
      /**
       * @type {WaveObject}
       */
      const wave = {
        raw_data: buffer,
        data: new Int16Array(buffer.buffer),
        bitDepth: 16,
        numChannels: 1,
        sampleRate
      };
      resolve(wave);
    }
    try {
      _synthesis(cb, text, { ...options, htsvoice, dictionary });
    } catch (err) {
      reject(err);
    }
  });
}


/** 
 * Path to builded dictionary.
 * @type {string} 
 * */
exports.dictionary_dir = path_to_dictionary;