WiTiAi 发表于 2023-7-28 15:49:54

提取语音输出文本唯一出现ID


【1】采用halcon软件提取语音输出文本唯一出现ID
dev_update_off()
dev_set_draw('margin')
pathwav:='D:/2-LearningCode/902-ASR/AISHELL-1/data_aishell/wav/train'
pathtrans := 'D:/2-LearningCode/902-ASR/AISHELL-1/data_aishell/transcript'
pathtranscript:='D:/2-LearningCode/902-ASR/AISHELL-1/data_aishell/transcript/aishell_transcript_v0.8.txt'
pathtranscriptWords := []
open_file (pathtranscript, 'input', FileHandle)
IsEOF := 0
nums := []
k:=0
while (IsEOF == 0)
    fread_line (FileHandle, Number, IsEOF)
    pathtranscriptWords := Number
    k:=k+1
endwhile
close_file (FileHandle)
*      
* 在pathtranscript里面查找对应的wav对应的输出,找出唯一出现过的字母或者文字
WavNameOutput := ''
for k:=0 to |pathtranscriptWords|-1 by 1
    scriptWord := pathtranscriptWords
    tuple_split(scriptWord, ' ', Substrings2)
    tuple_length(Substrings2, Length1)
    if(Length1>0)
      tuple_split(scriptWord, Substrings2, Substrings3)
      tuple_strlen(Substrings3, Length)
      tuple_substr(Substrings3, 1, Length-1, Substring)
      tuple_split(Substring, '\n', Substrings)
      WavNameOutput := WavNameOutput + Substrings
    endif
endfor
* WriteTxt (FilePath + WavName + '.txt', WavNameOutput)
* WavInputOut := WavInputOut + WavFile + ',' + FilePath + WavName + '.txt' + '\n'
*
* resPath := 'D:/2-LearningCode/902-ASR/AISHELL-1/data_aishell/'
* WriteTxt (resPath + 'ASRInfo.txt', WavInputOut)
UniqueWord := ''
tuple_strlen(WavNameOutput, Length2)
for i:= 0 to Length2-1 by 1
    tuple_substr(WavNameOutput, i, i, CurChar)
    tuple_strchr(UniqueWord, CurChar, Position)
    if(Position==-1 and CurChar != ' ')
      UniqueWord := UniqueWord + CurChar
    endif
endfor
* WriteTxt (FilePath + WavName + '.txt', WavNameOutput)
File_Name := 'D:/2-LearningCode/902-ASR/AISHELL-1/data_aishell/LabelWav.json'
file_exists(File_Name,FileExits)
if(FileExits)
    delete_file(File_Name)
    open_file(File_Name,'append',FileHandle)
else
    open_file(File_Name,'output',FileHandle)
endif
fwrite_string(FileHandle, '[' + '\n')
tuple_strlen(UniqueWord, Length3)
for i:= 0 to Length3-1 by 1
    tuple_substr(UniqueWord, i, i, Substring1)
    fwrite_string(FileHandle, '"' + Substring1 + '"' + '\n')
endfor
fwrite_string(FileHandle,'"' + ' ' + '"'+ '\n')
fwrite_string(FileHandle, ']' + '\n')
close_file (FileHandle)输出如下:
[
"_",
"'",
"a",
"b",
"c",
"d",
"e",
"f",
"g",
"h",
"i",
"j",
"k",
"l",
"m",
"n",
"o",
"p",
"q",
"r",
"s",
"t",
"u",
"v",
"w",
"x",
"y",
"z",
" "
]



页: [1]
查看完整版本: 提取语音输出文本唯一出现ID