📄 speech.cs
字号:
using System;
using System.Collections.Generic;
using System.Collections;
using System.Text;
using System.Drawing;
using System.Drawing.Drawing2D;
using System.ComponentModel;
using System.Windows.Forms;
namespace DSProcessing
{
/// <summary>
/// speech processing class, a part of DSProcessing library.
/// author: Jan Sova
/// mailto: twardowski@email.cz
///
/// -------------------------------------------------------------------------
///
/// DSProcessing - C#/C++ library of signal processing, speech processing,
/// and communications classes and functions
///
/// Copyright (C) 2007-2008
///
/// This program is free software; you can redistribute it and/or modify
/// it under the terms of the GNU General Public License as published by
/// the Free Software Foundation; either version 2 of the License, or
/// (at your option) any later version.
///
/// This program is distributed in the hope that it will be useful,
/// but WITHOUT ANY WARRANTY; without even the implied warranty of
/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
/// GNU General Public License for more details.
///
/// You should have received a copy of the GNU General Public License
/// along with this program; if not, write to the Free Software
/// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
///
///-------------------------------------------------------------------------
/// </summary>
public class Speech
{
/// <summary>
/// Fundamental frequency detection.
/// </summary>
/// <param name="speech"></param>
/// <param name="fs"></param>
/// <param name="start_index"></param>
/// <param name="okno_rozmer"></param>
/// <param name="prah"></param>
/// <param name="dataLoger"></param>
/// <returns></returns>
public static F0Data F0detection2(int[] speech, double fs, int start_index, int okno_rozmer, double prah, Loger dataLoger)
{
//velikost recoveho zaznamu
long N = speech.Length;
//int okno_rozmer=512*2;
//int start_index = 20*2*2; // pocatecni index pro hledani pitch v datech
//double prah = 0.5;
int posun = okno_rozmer;//2; // prah pro rozhodnuti znely/neznely
int K = 0;// 70 + 25;
double powerPrah = 25; //[dB]
// do techto promenych budou nahravana data:
double[] retF0 = new double[(int)(N / posun)+1];
double[] retPower = new double[(int)(N / posun)+1];
//predzpracovani vstupniho signalu
double[] signal = Tools.minus(speech, Tools.mean(speech));
double[] signalAbs = Tools.abs(signal);
double[] sig_pre = Tools.divide(signal, Tools.maxValue(signalAbs));
double[] a ={ 1, 0};
double[] b ={ 1, -0.9};
double[] prazdne = new double[N];
double[] sig = Filter.filt(b, a, sig_pre);
// inicializace vektoru
//double[] frekv_acf; double[] frekv_ceps; double[] frekv_sqrt4;
//double[] max_ac;
int k = 0; // pomocne pocitadlo
double[] frame = new double[0];
double[] framePower = new double[0];
for(int start_vz = 0; start_vz < N; start_vz += posun)
{
if (start_vz+okno_rozmer < N) //kontrola konce souboru
{
frame = Tools.fromToVector(sig,start_vz,start_vz+okno_rozmer);
framePower = Tools.fromToVector(signal, start_vz, start_vz + okno_rozmer);
}
// vahovane amplitudove spektrum
double[] frameWindow = Windows.multHanning(frame);
Complex[] frameWindowComplex = Complex.makeRealPart(frameWindow);
FourierTransform.FFT(frameWindowComplex,FourierTransform.Direction.Forward);
double[] spec = Complex.AbsField(frameWindowComplex);
//dataLoger.WriteLine(specPart);
Complex[] complexNumber = Complex.makeRealPart(Tools.pow(spec, 2));
FourierTransform.FFT(complexNumber, FourierTransform.Direction.Backward);
double[] acf = Complex.real(complexNumber);
//double acfMaximum = Tools.maxValue(acf);
acf = Tools.divide(acf,acf[0]); // normalizace autokorelacni funkce
//power
double[] frameWindowPower = Windows.multHanning(framePower);
Complex[] frameWindowComplexPower = Complex.makeRealPart(frameWindowPower);
FourierTransform.FFT(frameWindowComplexPower, FourierTransform.Direction.Forward);
double[] specPower = Complex.AbsField(frameWindowComplexPower);
double[] specPartPower = Tools.fromToVector(specPower, 5, spec.Length / 2);
double power = (fs) * Tools.squareSum(specPartPower) / ((N / 2) - 5);
power = 10 * Math.Log10(power) + K;
// urceni maxim
double[] acfPul = Tools.fromToVector(acf,start_index+1,okno_rozmer/2);
double[] acf_params = Tools.max(acfPul);
double max_acf = acf_params[0];
double pos_acf = acf_params[1];
if (max_acf < prah || power < powerPrah)
{
retF0[k] = 0;
retPower[k] = power;
}
else
{
retF0[k] = fs / (pos_acf + start_index);
retPower[k] = power;
}
k++;
}
//medianova filtrace
double[] result = Filter.medfilt1(retF0, 5);
//nahrani dat na vystup
F0Data returnValue = new F0Data();
for (int i = 0; i < result.Length; i++)
returnValue.Add(result[i], retPower[i]);//retPower[k]);
return returnValue;
}
/// <summary>
/// Dektekce fundamentalni frekvence
/// </summary>
/// <param name="speech">data nad kterymi je provadena analyza</param>
/// <param name="fs">vzorkovaci kmitocet</param>
/// <param name="loger">vzorkovaci kmitocet</param>
/// <returns>hodnota start_index je odladena pro vzorkovaci frekvenci fs = 11025</returns>
public static F0Data F0detection(int[] speech, double fs, Loger loger)
{
F0Data returnValue = new F0Data();
//double[] speech = new double[data.Length];
//for (int i = 0; i < data.Length; i++)
// speech[i] = (double)data[i];
//double[] speech2 = Tools.minus(speech, Tools.maxValue(speech));
double[] signal = Tools.minus(speech, Tools.mean(speech));
double[] signalAbs = Tools.abs(signal);
double[] sig = Tools.divide(signal, Tools.maxValue(signalAbs));
double[] segment;
double[] segmentPower;
int N = speech.Length;
int delka_okna = 512*4;
int delka_okna_pul = delka_okna/2;
int posun_okna = delka_okna;//2;
int start_index = 30;// 20;// 10;// 20;
double prah = 0.8;// 0.6;// 0.6; // .6;
int F0max = 1000; //maximalni F0, kterou povazuji za verohodnou
int prah_power = 40;//[dB]!
//double[] returnValue = new double[(int) N/posun_okna];
double[] retF0 = new double[(int)N / posun_okna];
double[] retPower = new double[(int)N / posun_okna];
DateTime startTime = DateTime.Now;
for (int start = 0; start < N; start += posun_okna)
{
//if(start + delka_okna < N)
segment = Tools.fromToVector(sig, start+1, start+1 + delka_okna);
segmentPower = Tools.fromToVector(signal, start + 1, start + 1 + delka_okna);
if(segment != null && (segment.Length) == delka_okna)
{
double power = Tools.squareSum(segmentPower) * (1/fs);// *(1 / (double)delka_okna); //zavislost na vzorkovaci frekvenci
//Console.WriteLine("Vykon pred logartimem: " + power);
power = (10 * Math.Log(power))-33;// -108; //prevod na decibely
//Console.WriteLine("Vykon po logaritmu: "+power);
segment = Windows.multHanning(segment);
double[] acf = Tools.xcorr(segment);
//BP: loger acf
loger.WriteLine(acf);
//<pozor>to by znamenalo, ze fromToVector nefunguje</pozor>
//double[] segment2 = Tools.fromToVector(acf, start_index, delka_okna/2);
double[] segment2 = new double[delka_okna_pul-start_index];
int j = 0;
for (int i = start_index; i < delka_okna_pul; i++)
{
segment2[j] = acf[i];
j++;
}
if (segment2 != null)
{
//loger.WriteLine(segment2);
//BP: loger po segmentaci
double[] dataMax = Tools.max(segment2);
//Console.WriteLine(Tools.positionOfMax(segment2));
double max_acf = dataMax[0];
double pos_acf = dataMax[1];
double F0 = fs / (pos_acf + start_index);
//if (max_acf < prah || F0 > F0max)
if (power < prah_power || F0 > F0max || max_acf < prah)
{
F0 = 0;
}
//else
//Console.WriteLine("F0: " + F0);
//Console.WriteLine("");
retF0[start / posun_okna] = F0;
retPower[start / posun_okna] = power;
}
}
}
DateTime endTime = DateTime.Now;
double deltaTime = (endTime - startTime).TotalMilliseconds;
Console.WriteLine("Cas vypoctu byl: " + deltaTime);
double[] po = Filter.medfilt1(retF0, 5);
for (int i = 0; i < retF0.Length; i++)
{
}
for (int i = 0; i < po.Length; i++)
{
returnValue.Add(po[i], retPower[i]);
//returnValue.Add(100, retPower[i]);
}
return returnValue;
}
}
/// <summary>
/// TODO
/// </summary>
public class F0Data
{
private ArrayList f0; //Fnula
private ArrayList power; // vykon dane hlasky
/// <summary>
/// TODO
/// </summary>
public ArrayList F0
{
set
{
f0 = (ArrayList)value.Clone();
}
get
{
return (ArrayList)f0.Clone();
}
}
/// <summary>
/// TODO
/// </summary>
public ArrayList Power
{
set
{
power = (ArrayList)value.Clone();
}
get
{
ArrayList ar = new ArrayList();
ar = (ArrayList)power.Clone();
return ar;
}
}
/// <summary>
/// TODO
/// </summary>
public F0Data()
{
f0 = new ArrayList();
power = new ArrayList();
}
/// <summary>
/// TODO
/// </summary>
/// <param name="f0"></param>
/// <param name="power"></param>
public void Add(double f0, double power)
{
this.f0.Add(f0);
this.power.Add(power);
}
/// <summary>
/// TODO
/// </summary>
public int Count
{
get
{
return f0.Count;
}
}
/// <summary>
/// TODO
/// </summary>
/// <param name="index"></param>
/// <returns></returns>
public double getF0onIndex(int index)
{
double d = (double) f0[index];
return d;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -