Getting Started with VoiceHarbor

This page will help you get started with Nijta. You'll be up and running in a jiffy!

Nijta presents the VoiceHarbor API, designed to anonymize audio files by extracting sensitive information from both voice and spoken content. Explore our services through the Quick Start page for a concise introduction.

Our proficient team remains at your disposal to address all your inquiries pertaining to our API, its features, as well as the accompanying documentation. Should you have any questions along the way, need clarification on any topic, don't hesitate to create an issue.

👍

How to Obtain Your Access Token:

Getting your hands on this magical token is a breeze! Simply reach out to us at [email protected] and let us know that you're ready to embrace the Nijta experience at its fullest. Our dedicated team is here to assist you in receiving your Access Token promptly, so you can start your journey without delay.

Making your first API call

You can make requests using any languages you want.
Here, we will examples in python and javascript or using Nijta's cli:

import json
import os
import time

import nijtaio

TOKEN = '<token>'
API_URL = 'https://api.nijta.com'
OUTPUT_DIR = 'output'
headers = {'Content-Type': 'application/json; charset=utf-8', 'TOKEN': TOKEN}
params = {
    'language': 'french_8',
    'gender': 'f',
    'voice': True,
    'content': True,
    'entities': 'Name,Organization,Location,City,Country,Numbers,Age,Date,Credit Card Number,Email,Concept,Product,Event,Technology,Group,Medical Condition,Characteristic,Research,County',
}


response = nijtaio.send_request(
    ['path/to/audio_1.wav', 'path/to/audio_2.mp3', 'path/to/audio_3.ogg'],
    params,
    nijtaio.session(TOKEN, api_url=API_URL),
    headers=headers,
    api_url=API_URL,
)

task_id = json.loads(response.content)['data']['task_id']

print(f'{task_id = }')

print('Waiting for the batch to be processed.')

status = ''

while status != 'finished':
    time.sleep(1)

    status, anonymized_batch = nijtaio.read_response(task_id, api_url=API_URL)
    print(f'{status = }', end='\r')

print(f'Writing results in {OUTPUT_DIR}.')
os.makedirs(OUTPUT_DIR, exist_ok=True)
for original_filepath, anonymized_item in anonymized_batch.items():
    filename = os.path.basename(original_filepath)

    with open(os.path.join(OUTPUT_DIR, filename), mode='wb') as f:
        f.write(anonymized_item['audio'])
    if params['content']:
        print(filename, anonymized_item['transcription'])

print(f'Done. Check the results in the {OUTPUT_DIR} directory.')

// REQUIREMENTS
// npm install node-fetch

const fetch = (...args) => import('node-fetch').then(({default: fetch}) => fetch(...args));// require('node-fetch');
const fs = require('fs');
const path = require('path');
const glob = require('glob');


const API_URL = 'https://api.nijta.com/';
const TOKEN = "<token>";
const audio_folder = 'samples/*';  // path to a folder with audio files to anonymize
const output_folder = 'output';    // path to folder to store anonymized files

var params = {
        'language': 'french_8',
        'gender': 'f',
        'voice': 'True',
        'content': 'True',
        'entities': 'Name,Organization,Location,City,Country,Numbers,Age,Date,Credit Card Number,Email,Concept,Product,Event,Technology,Group,Medical Condition,Characteristic,Research,County'
};

// Build a batch of audio files
const batch_json = createAudioBatch(audio_folder);

// Create a session 
var session_id = "";
session_id = await getSessionId(TOKEN);
console.log('Session ID:', session_id);

// Submit the batch to Nijta API
const task_id = await submitBatchAndFetchTaskId(API_URL, session_id, params, TOKEN, batch_json);
console.log("Task ID:", task_id);

// Waiting for the batch to be processed and write the result on disk
await processBatch(API_URL, task_id, output_folder);

//////////////////////////////////////////////////
// FUNCTIONS

function createAudioBatch(audio_folder) {
  console.log(`Create a batch of audio files.`);
  
  const audio_files = glob.sync(audio_folder);
  const payload = {};
  
  for (const audio of audio_files) {
    const file_data = fs.readFileSync(audio, 'hex');
    payload[path.basename(audio)] = file_data;
  }
  
  const json_string = JSON.stringify(JSON.stringify(payload));
  return json_string;
}

async function getSessionId(token) {
    const response = await fetch(API_URL + '/session', {
        method: 'POST',
        headers: {
            'Content-Type': 'application/json; charset=utf-8',
            'TOKEN': token
        }
    });

    const json = await response.json();
    const session_id = json['session_id'];
    return session_id;
}


async function submitBatchAndFetchTaskId(API_URL, session_id, params, TOKEN, json_string) {
    console.log(`Submit the batch`);
    var task_id = "";

    await fetch(API_URL + "/tasks/" + session_id + '?' + new URLSearchParams(params).toString(), {
        method: 'POST',
        headers: {
            'Accept': 'application/json',
            'Content-Type': 'application/json; charset=utf-8',
            'TOKEN': TOKEN,
        },
        body: json_string
    }).then(function(response) {
        return response.json().then(function (json) {
            console.log(json)
            task_id = json['data']['task_id'];
        });
    });

    return task_id;
}


async function processBatch(API_URL, task_id, outputFolder) {
  console.log(`Waiting for the batch to be processed`);
  var status = "started";
  var content;

  while (status != "finished") {
    await fetch(API_URL + "/tasks/" + task_id, {
      method: 'GET',
    }).then(function(response) {
      return response.json().then(async function (json) {
        status = json['data']['task_status'];
        if(status == 'finished') {
          content = json['data']['task_result'];
        } else {
            await new Promise(r => setTimeout(r, 2000));
        }
      })
    });
  }

  console.log(`Read the result and write in ${outputFolder}`);
  fs.mkdirSync(outputFolder, { recursive: true });
  const anon_batch_in_bytes = Buffer.from(content['response'], 'hex');
  const anon_batch = JSON.parse(anon_batch_in_bytes.toString());

  for (const filename in anon_batch['data']) {
    const audioData = Buffer.from(anon_batch['data'][filename]['audio'], 'hex');
    fs.writeFileSync(path.join(outputFolder, filename), audioData);
    // display the transcription if content is anonymized
    console.log(`${filename}: ${anon_batch['data'][filename]['transcription']}`);
  }
}



voiceharbor	--token bf090987-e0dc-42e0-a837-356cb6056f8c \
  --input_data "[\"path/to/audio_1.wav\", \"path/to/audio_2.mp3\"]" \
  --language english_16 \
  --gender m \
  --voice True \
  --content True \
  --entities Name,Organization,Location,City,Country,Numbers,Age,Date,Credit\ Card\ Number,Email,Concept,Product,Event,Technology,Group,Medical\ Condition,Characteristic,Research,County \
  --output_folder output