#!/usr/bin/perl -w # # voiceServer.pl # Voice server for Zaza's new face and voice. # # Copyright (C) 2002 Brian Rudy (brudyNO@SPAMpraecogito.com) # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # # Revision history # # 0.01 2-13-2002 # First functional version. Basic speech request support (no SABLE, JSML, # VXML or other markup supported). No 'emotion' support. # # # Bugs: # Sending a request for the same speech cue works, but behaves a little # differently than other requests. # # # Notes: # # Persistent server application with CGI front-end for speech requests. # IPC::Shareable for client comm (or if faster, lower CPU util ShareLite) # # Speech rendering requests will be sent to a persistant festival_server # instance, ala Mister House. We will stay connected for speed. # # Might want to impliment 'speech cache' for speed? Take CRC32 of string # or files, use strigified version as filename for pre-rendered speech. # # Since this approach only speeks one sentance at a time, might want to # split multi-sentance speech request text into one or more speech # requests. (might be tricky w/ current implimentation) # # Hooks for CGI face client programs # Seperate CGI client that holds connection open until client disconnects # (KeepAlive). Sends alert messages to Java applet/application that a new # speech script and WAV file are ready for download, and their URLs. # # Hooks for CGI speech requests # # # Requires: # String::CRC32 # IPC::Shareable # Festival::Client::Async use String::CRC32; use IPC::Shareable; use Festival::Client::Async qw(parse_lisp); ## Config info ## my $festival_server = "set"; my $festival_port = 1314; # Where on local filesystem phones and waveforms are stored. my $speech_data_dir = "/home/brudy/public_html/zaza/speechdata/"; # Clean up on untrapped signal error $SIG{INT} = \&catch_int; sub catch_int { print "Got SIGHUP, shutting down.\n"; exit; } ## main # setup shared memory my $cueglue = 'cuedata'; my $faceglue = 'facedata'; my %options = ( 'create' => 1, 'exclusive' => 0, 'mode' => 0666, 'destroy' => 1, ); my %faceoptions = ( 'create' => 1, 'exclusive' => 0, 'mode' => 0666, 'destroy' => 1, ); ## If this code dies or is killed unexpectedly the semaphore arrays ## will be unwritable. They must be identified with ipcs, and deleted with ## ipcrm manually! catch_int should resolve this problem now. # This is our speech cue input shm tie(%cueinfo, IPC::Shareable, $cueglue, { %options }) or die "voiceServer: tie failed\n"; # This is our face client output shm tie(%faceinfo, IPC::Shareable, $faceglue, { %faceoptions }) or die "voiceServer: tie failed\n"; #%faceinfo = ( 'timestamp' => time(), # 'stringfinger' => "1234567890", # ); # index pre-rendered speech directory, store in local array. print "Indexing pre-rendered speech.\n"; opendir(DIR, $speech_data_dir) || die "Can't open speech data directory!"; @filenames = readdir(DIR); closedir(DIR); my @stringlist; my $fileindex = 0; my $renderedspeech = "nothing"; my $rendering = 0; my $donerendering; my $currenttime; foreach $file (@filenames) { if (($file ne "..") && ($file ne ".")) { # we are only counting the wavs if ($file =~ m/(\d+).wav/) { print "Found " . $1 . "\n"; $stringlist[$fileindex] = $1; $fileindex++; } } } # connect to festival server $fest = Festival::Client::Async->new($festival_server, $festival_port) or die "couldn't connect to Festival server: $!"; # send setup info to festival (voice, Wavefiletype, phoneme ouput, etc) my @lispsetup = ( "(Parameter.set \'Wavefiletype \'wav)", "(voice_us1_mbrola)", "(tts_return_to_client)", "(load \"/home/brudy/public_html/zaza/java/faceapplet/zaza-mode.scm\")" ); for $index (0 .. $#lispsetup) { print "Sending:" . $lispsetup[$index] . "\n"; $fest->server_eval_sync($lispsetup[$index], { LP => sub { my $lisp = shift; print "lisp response=" . $lisp; }, WV => sub { my $wave = shift; print "I got wave data.\n"; } }) or die "error from Festival server"; } # Set some sane initial values in both shm mailboxes $faceinfo{timestamp} = time(); $faceinfo{stringfinger} = sprintf "%s", crc32("voiceServer initialized"); # We might end up in a race condition with a cue client if the # client trys to write the shm before we do. $cueinfo{timestamp} = $faceinfo{timestamp}; #$cueinfo{timestamp} = $faceinfo{timestamp} + 1; $cueinfo{cuestring} = "voiceServer initialized"; $currenttime = $faceinfo{timestamp}; #main loop while (1) { if ($cueinfo{timestamp} != $currenttime) { # Do we need to do something, or just wait for renderspeech to finish? if (!$rendering && (crc32($cueinfo{cuestring}) != crc32($renderedspeech))) { print "Got a speech request for:" . $cueinfo{cuestring} . " fingerprint:" . (crc32($cueinfo{cuestring})) . "\n"; my $match = 0; for $stringindex (0 .. $#stringlist) { if($stringlist[$stringindex] == (crc32($cueinfo{cuestring}))) { $match = 1; } } if ($match) { print "Returning cached request.\n"; $renderedspeech = $cueinfo{cuestring}; $donerendering = 1; } else { $donerendering = 0; renderspeech($cueinfo{cuestring}); } } if ($donerendering) { $currenttime = $cueinfo{timestamp}; $faceinfo{timestamp} = $currenttime; # $faceinfo{stringfinger} = sprintf "%s", crc32($cueinfo{cuestring}); $faceinfo{stringfinger} = sprintf "%s", crc32($renderedspeech); print "Done handling request for: $renderedspeech\n"; } } # We need to sleep for a little while, or IPC::Sharable misbehaves # sleep for 250ms select(undef, undef, undef, 0.25); } sub renderspeech { ($speechstring) = @_; $rendering = 1; my $stringfinger = crc32($speechstring); my $outfile = $speech_data_dir . $stringfinger . "\.wav"; print "Creating $outfile.\n"; open(AUDIO, ">$outfile") || die "Cannot create $outfile!\n"; # Render and save waveform to local filesystem my $mrstring = "(tts_text \"$speechstring\" \'zaza)"; print "Sending $mrstring\n"; $fest->server_eval_sync($mrstring, { LP => sub { my $lisp = shift; #my $lisp = @_; print "lisp response=" . $lisp; }, WV => sub { my $wave = shift; #print "I got wave data.\n"; print AUDIO $wave; } }) or die "error from Festival server"; close(AUDIO); print "Waveform written.\n"; # Extract phones from previous request my $concatresponse; $fest->server_eval_sync("(print_string myphones)", { LP => sub { my $lisp = shift; # We need to remove the escape characters, # leading and trailing quotes and # concatinate before running parse_lisp() # on the response. $lisp =~ s/\\(.)/$1/g; $lisp =~ s/^\"//; $lisp =~ s/\"$//; $concatresponse = $concatresponse . $lisp; } }) or die "error from Festival server"; #print "Done: $concatresponse"; my @names = $concatresponse =~ m/name \"(\w+)\"/g; #print "There are " . scalar @names . " names in the response.\n"; my @ends = $concatresponse =~ m/end ([0-9]+[.]?[0-9]*|[.][0-9]+)/g; #print "There are " . scalar @ends . " ends in the response.\n"; my $phoneout = $speech_data_dir . $stringfinger . "\.script"; open(PHONES, ">$phoneout") || die "Cannot create $phoneout!\n"; print "Creating $phoneout\n"; for $phonename (0 .. $#names) { printf PHONES "%.4f %s\n", $ends[$phonename], $names[$phonename]; } close(PHONES); print "Phoneme script written.\n"; # push newly rendered onto end of @stringlist push @stringlist, $stringfinger; print "There are now " . $#stringlist . " elements in the string list:\n"; for $theindex (0 .. $#stringlist) { print $stringlist[$theindex] . "\n"; } $renderedspeech = $speechstring; $rendering = 0; $donerendering = 1; }