This page explains how you can incorporate the Open Vokaturi software in your own C app (batch version).
The following example measures the five emotions, averaged over a whole pre-existing recording.
/*
measureAverageEmotions.c
public-domain sample code by Vokaturi, 2022-08-23
A function that reports the average emotion probabilities
in a prerecorded sampled sound.
*/
#include <stdio>
#include <stdlib>
#include "Vokaturi.h"
void reportAverageEmotions (
int numberOfSamples, // the number of samples in the recording
double samples [], // the buffer that contains the recording,
// i.e., the indexes run from 0 through numberOfSamples-1
double sampleRate) // the sampling frequency in hertz
{
VokaturiVoice voice = VokaturiVoice_create (sampleRate,
numberOfSamples, 0);
VokaturiVoice_fill_float64array (voice, numberOfSamples, samples);
VokaturiQuality quality;
VokaturiEmotionProbabilities emotionProbabilities;
VokaturiVoice_extract (voice, & quality, & emotionProbabilities);
VokaturiVoice_destroy (voice);
if (quality.valid) {
printf ("Neutrality: %.3f\n", emotionProbabilities.neutrality);
printf ("Happiness: %.3f\n", emotionProbabilities.happiness);
printf ("Sadness: %.3f\n", emotionProbabilities.sadness);
printf ("Anger: %.3f\n", emotionProbabilities.anger);
printf ("Fear: %.3f\n", emotionProbabilities.fear);
} else {
printf ("This sound contains no reliably voiced parts.");
}
}
The trick in this “batch” example is to create a
VokaturiVoice
that is big enough to contain all samples,
then use VokaturiVoice_fill()
only once, i.e. to transfer
all the samples of the sound file into the VokaturiVoice
object in one go. The analysis by VokaturiVoice_extract()
will then report the emotions averaged over the whole sound.
The above example is not a full program. The following is a full program that measures the emotions in one or more WAV files.
/*
MeasureWav.c
public-domain sample code by Vokaturi, 2022-08-23
A program that calls the Vokaturi API
to report the average emotion probabilities
in a prerecorded WAV file.
*/
#include <math.h>
#include "WavFile.h"
int main (int argc, const char * argv[]) {
if (argc < 2) {
printf ("Usage: MeasureWav [soundfilename.wav ...]\n");
exit (1);
}
printf ("**********\nWAV files analyzed with:\n%s\n**********\n",
Vokaturi_versionAndLicense ());
for (int ifile = 1; ifile < argc; ifile ++) {
const char *fileName = argv [ifile];
printf ("\nEmotion analysis of WAV file %s:\n", fileName);
VokaturiWavFile wavFile;
VokaturiWavFile_open (fileName, & wavFile);
if (! VokaturiWavFile_valid (& wavFile)) {
fprintf (stderr, "Error: WAV file not analyzed.\n");
exit (1);
}
VokaturiVoice voice = VokaturiVoice_create (
wavFile.samplingFrequency,
wavFile.numberOfSamples,
0 // since fill() and extract() appear in the same thread
);
VokaturiWavFile_fillVoice (& wavFile, voice,
0, // the only or left channel
0, // starting from the first sample
wavFile.numberOfSamples // all samples
);
VokaturiQuality quality;
VokaturiEmotionProbabilities emotionProbabilities;
VokaturiVoice_extract (voice, & quality, & emotionProbabilities);
if (quality.valid) {
printf ("Neutrality %f\n", emotionProbabilities.neutrality);
printf ("Happiness %f\n", emotionProbabilities.happiness);
printf ("Sadness %f\n", emotionProbabilities.sadness);
printf ("Anger %f\n", emotionProbabilities.anger);
printf ("Fear %f\n", emotionProbabilities.fear);
} else {
printf ("Not enough sonorancy to determine emotions\n");
}
VokaturiVoice_destroy (voice);
VokaturiWavFile_clear (& wavFile);
}
}
This program uses the public-domain header file
WavFile.h
, which is included in the src
directory of the SDK. A precompiled version for 64-bit Windows is
examples/MeasureWav_win64.exe
.
The following example gives the roughest possible measurement of whether the emotions change throughout the recording.
void reportEmotionsForHalves (int numberOfSamples, double samples [],
double sampleRate)
{
const double totalDuration = numberOfSamples / sampleRate;
const double durationOfHalf = 0.5 * totalDuration;
const double bufferSafetyTime = 1.0; // one second suffices
const double bufferDuration = totalDuration + bufferSafetyTime;
const int bufferLength = sampleRate * bufferDuration;
const int midSample = numberOfSamples / 2;
VokaturiVoice voice = VokaturiVoice_create (
sampleRate, bufferLength, 0
);
VokaturiQuality quality;
VokaturiEmotionProbabilities emotionProbabilities;
printf ("Half Neutrality Happiness Sadness Anger Fear\n");
/*
Do the first half.
*/
VokaturiVoice_fill_float64array (voice, midSample, & samples [0]);
VokaturiVoice_extract (voice, & quality, & emotionProbabilities);
if (quality.valid)
printf (
"first %.6f %.6f %.6f %.6f %.6f\n",
emotionProbabilities.neutrality,
emotionProbabilities.happiness,
emotionProbabilities.sadness,
emotionProbabilities.anger,
emotionProbabilities.fear
);
/*
Do the second half.
*/
VokaturiVoice_fill_float64array (voice,
numberOfSamples - midSample, & samples [midSample]);
VokaturiVoice_extract (voice, & quality, & emotionProbabilities);
if (quality.valid)
printf (
"second %.6f %.6f %.6f %.6f %.6f\n",
emotionProbabilities.neutrality,
emotionProbabilities.happiness,
emotionProbabilities.sadness,
emotionProbabilities.anger,
emotionProbabilities.fear
);
VokaturiVoice_destroy (voice);
}
It is somewhat important that the parts that you fill are adjacent. The first half therefore consists of the samples numbered \(0\) through \(midSample-1\), and the second half consists of the samples numbered \(midSample\) through \(numberOfSamples-1\).
The following example explains how you can retrieve the development of the emotions throughout a longer recording.
void reportEmotionDevelopment (
int numberOfSamples, // number of samples in entire recording
double samples [], // the buffer that holds the entire recording,
// i.e., the indexes run from 0 through numberOfSamples-1
double sampleRate, // the sampling frequency in hertz
double timeStep) // how often you want to measure emotions (seconds)
{
const double bufferSafetyTime = 1.0; // one second suffices
const double bufferDuration = timeStep + bufferSafetyTime;
const int bufferLength = sampleRate * bufferDuration;
VokaturiVoice voice = VokaturiVoice_create (
sampleRate, bufferLength, 0
);
printf (
"Start(s) End(s) Neutrality Happiness Sadness Anger Fear\n"
);
const double duration = numberOfSamples / sampleRate;
const int numberOfSteps = duration / timeStep; // round down
for (int istep = 0; i < numberOfSteps; i ++) {
const double startingTime = (istep - 1) * timeStep;
const double endTime = istep * timeStep;
int startingSample = startingTime * sampleRate;
int endSample = endTime * sampleRate;
if (endSample > numberOfSamples)
endSample = numberOfSamples;
VokaturiVoice_fill_float64array (voice,
endSample - startingSample, & samples [startingSample]);
VokaturiQuality quality;
VokaturiEmotionProbabilities emotionProbabilities;
VokaturiVoice_extract (voice, & quality, & emotionProbabilities);
if (quality.valid)
printf (
"%.3f %.3f %.6f %.6f %.6f %.6f %.6f\n",
startingTime, endTime,
emotionProbabilities.neutrality,
emotionProbabilities.happiness,
emotionProbabilities.sadness,
emotionProbabilities.anger,
emotionProbabilities.fear
);
}
VokaturiVoice_destroy (voice);
}