#!/usr/bin/perl -w
#

# voiceServer.pl
# Voice server for Zaza's new face and voice.
#
#  Copyright (C) 2002 Brian Rudy (brudyNO@SPAMpraecogito.com)
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#
# Revision history
#
# 0.03 5-21-2002
# Updated face client outbox shm to use 'stack' of n last cues (specified by 
# $cuestacksize). This should allow slower async clients to keep current, and
# prevent the loss of rapidly-sent cues. Added emotion support.
#
# 0.02 5-20-2002
# Added audio post-processing option to clean up output on Zaza's hardware. 
# Fixed shm init to properly auto-generate 'voiceServer initialized' message
# if not already cached.
#
# 0.01 2-13-2002 
# First functional version. Basic speech request support (no SABLE, JSML, 
# VXML or other markup supported). No 'emotion' support.
#
#
# Bugs: 
# Sending a request for the same speech cue works, but behaves a little 
# differently than other requests. 
#
#
# Notes:
#
# Persistent server application with CGI front-end for speech requests.
# IPC::Shareable for client comm.
#
# Speech rendering requests will be sent to a persistant festival_server
# instance specified by $festival_server and $festival_port.
#
# Speech 'cache' used for speed. Pre-rendered scripts and wavefiles stored 
# on disk, and re-indexed each time voiceServer is run.
#
# Since this approach only speaks one sentance at a time, might want to 
# split multi-sentance speech request text into one or more speech 
# requests. (might be tricky w/ current implimentation)
#
# Hooks for CGI face client programs
# Seperate CGI clients for Java face interface and speech cue input handling.
#
#
# Requires:
#  String::CRC32
#  IPC::Shareable
#  Festival::Client::Async


use String::CRC32;
use IPC::Shareable;
use Festival::Client::Async qw(parse_lisp);

   
## Config info 
##
my $festival_server = "localhost";
my $festival_port   = 1314;

# Where on local filesystem phones and waveforms are stored.
my $speech_data_dir = "/home/brudy/public_html/zaza/speechdata/";

# Post-process the audio after rendering. If no post-processing
# is desired, just leave this blank.
my $ppopts = "| sox -twav - -twav - pitch 550 35 c c ";

# Number of cues to keep in shm outbox 'stack'
my $cuestacksize = 10;


# Clean up on untrapped signal error 
$SIG{INT} = \&catch_int;
sub catch_int {
  print "Got SIGHUP, shutting down.\n";
  exit;
}


## main

# setup shared memory

my $cueglue = 'cuedata';
my $faceglue = 'facedata';
my %options = ( 'create'    => 1,
                'exclusive' => 0,
                'mode'      => 0666,
                'destroy'   => 1,
              );

my %faceoptions = ( 'create'    => 1,
                'exclusive' => 0,
                'mode'      => 0666,
                'destroy'   => 1,
              );

## If this code dies or is killed unexpectedly the semaphore arrays
## will be unwritable. They must be identified with ipcs, and deleted with 
## ipcrm manually! catch_int should resolve this problem now.

# This is our speech cue input shm
tie(%cueinfo, IPC::Shareable, $cueglue, { %options }) or
        die "voiceServer: tie failed\n";

# This is our face client output shm
tie(@faceinfo, IPC::Shareable, $faceglue, { %faceoptions }) or
        die "voiceServer: tie failed\n";

#%faceinfo = ( 'timestamp'    => time(),
#              'stringfinger' => "1234567890",
#            );


# index pre-rendered speech directory, store in local array.

print "Indexing speech cache.\n";
opendir(DIR, $speech_data_dir) || die "Can't open speech data directory!";
@filenames = readdir(DIR);
closedir(DIR);

my @stringlist;
my $fileindex = 0;
my $renderedspeech = "nothing";
my $rendering = 0;
my $donerendering;
my $currenttime;
my $match;

foreach $file (@filenames) {
  if (($file ne "..") && ($file ne ".")) {
    # we are only counting the wavs
    if ($file =~ m/(\d+).wav/) {
      print "Loading " . $1 . "\n";
      $stringlist[$fileindex] = $1;
      $fileindex++;
    }
  }
}


# connect to festival server
$fest = Festival::Client::Async->new($festival_server, $festival_port)
      or die "couldn't connect to Festival server: $!";

# send setup info to festival (voice, Wavefiletype, phoneme ouput, etc)

my @lispsetup = ( 
                  "(Parameter.set \'Wavefiletype \'wav)",
                  "(voice_us1_mbrola)",
                  "(tts_return_to_client)",
                  "(load \"/home/brudy/voiceServer/zaza-mode.scm\")"
                );

for $index (0 .. $#lispsetup) {
  print "Sending:" . $lispsetup[$index] . "\n";
  $fest->server_eval_sync($lispsetup[$index], 
                          {
			    LP => sub {
				     my $lisp = shift;
                                     print "lisp response=" . $lisp;
			    },
			    WV => sub {
				     my $wave = shift;
                                     print "I got wave data.\n";
			    }
                          }) or die "error from Festival server";
}

# Set some sane initial values in both shm mailboxes
$faceinfo[0]{timestamp} = 0;
$faceinfo[0]{stringfinger} = sprintf "%s", "0";
$faceinfo[0]{emotion} = "happy";

# We might end up in a race condition with a cue client if the 
# client trys to write the shm before we do.
$cueinfo{timestamp} = time(); 
$cueinfo{cuestring} = "voiceServer initialized";
$currenttime = $faceinfo[0]{timestamp};
$cueinfo{emotion} = "happy";


#main loop
while (1) {
  if ($cueinfo{timestamp} != $currenttime) {
    # Do we need to do something, or just wait for renderspeech to finish?
    if (!$rendering && ((crc32($cueinfo{cuestring}) != crc32($renderedspeech)) || ($cueinfo{cuestring} eq '0'))) {
      if ($cueinfo{cuestring} eq '0') {
        # Client only requested emotion change
        print "Met first emotion only change condition.\n";
        $match = 1;
      }
      else {
        print "Got a speech request for: " . $cueinfo{cuestring} . 
          ", fingerprint:" . (crc32($cueinfo{cuestring})) . "\n";
        $match = 0;
        for $stringindex (0 .. $#stringlist) {
          if($stringlist[$stringindex] == (crc32($cueinfo{cuestring}))) {
            $match = 1;
          }
        }
      }
      if ($match) {
        if ($cueinfo{cuestring} eq '0') {
          print "Emotion only change.\n";
        }
        else {
          print "Returning cached request.\n";
        }
        $renderedspeech = $cueinfo{cuestring}; 
        $donerendering = 1;
      }
      else {
        $donerendering = 0;
        renderspeech($cueinfo{cuestring});
      }
    }
    if ($donerendering) {
      $currenttime = $cueinfo{timestamp};
      if ($cueinfo{cuestring} eq '0') {
        push @faceinfo, {timestamp => $currenttime, stringfinger => '0', emotion => $cueinfo{emotion}};
      }
      else {
        push @faceinfo, {timestamp => $currenttime, stringfinger => (sprintf "%s", crc32($renderedspeech)), emotion => $cueinfo{emotion}};
      }
      if (($#faceinfo > $cuestacksize) || ($faceinfo[0]{timestamp} eq '0')) {
        shift @faceinfo;
      }
      print "Done handling request for: $renderedspeech\n";
      print "There are now " . $#faceinfo . " elements on the stack.\n";
      print "Timestamp\tStringfinger\tEmotion\n";
      for $stackelement (0 .. $#faceinfo) {
        print $faceinfo[$stackelement]{timestamp} . "\t" . $faceinfo[$stackelement]{stringfinger} . "\t" . $faceinfo[$stackelement]{emotion} . "\n";
      }
    }
  }
  # We need to sleep for a little while, or IPC::Sharable misbehaves
  # sleep for 250ms
  select(undef, undef, undef, 0.25);
}


sub renderspeech {
  ($speechstring) = @_;

  $rendering = 1;
  my $stringfinger = crc32($speechstring); 
  my $outfile = $speech_data_dir . $stringfinger . "\.wav";
  print "Creating $outfile.\n";
  open(AUDIO, "$ppopts>$outfile") || die "Cannot create $outfile!\n";
  # Render and save waveform to local filesystem
  my $mrstring = "(tts_text \"$speechstring\" \'zaza)";
  print "Sending $mrstring\n";
  $fest->server_eval_sync($mrstring,
                          {
                            LP => sub {
                                     my $lisp = shift;
                                     #my $lisp = @_;
                                     print "lisp response=" . $lisp;
                            },
                            WV => sub {
                                     my $wave = shift; 
                                     #print "I got wave data.\n";
                                     print AUDIO $wave;
                            }
                          }) or die "error from Festival server";
  close(AUDIO);
  print "Waveform written.\n";

  # Extract phones from previous request
  my $concatresponse;
  $fest->server_eval_sync("(print_string myphones)",
                          { 
                            LP => sub {
                                     my $lisp = shift;
                                     # We need to remove the escape characters,
                                     # leading and trailing quotes and 
                                     # concatinate before running parse_lisp() 
                                     # on the response.
                                     $lisp =~  s/\\(.)/$1/g;
                                     $lisp =~ s/^\"//;
                                     $lisp =~ s/\"$//;
                                     $concatresponse = $concatresponse . $lisp;
                            }
                          }) or die "error from Festival server";

  #print "Done: $concatresponse";

  my @names = $concatresponse =~ m/name \"(\w+)\"/g;
  #print "There are " . scalar @names . " names in the response.\n";
  
  my @ends = $concatresponse =~ m/end ([0-9]+[.]?[0-9]*|[.][0-9]+)/g;
  #print "There are " . scalar @ends . " ends in the response.\n";
 

  my $phoneout = $speech_data_dir . $stringfinger . "\.script";
  open(PHONES, ">$phoneout") || die "Cannot create $phoneout!\n";

  print "Creating $phoneout\n";
  for $phonename (0 .. $#names) {
    printf PHONES "%.4f %s\n", $ends[$phonename], $names[$phonename];
  }
  close(PHONES);  
  print "Phoneme script written.\n";

  # push newly rendered onto end of @stringlist
  push @stringlist, $stringfinger;

  print "There are now " . $#stringlist . " cues in the cache.\n";
  for $theindex (0 .. $#stringlist) {
    print $stringlist[$theindex] . "\n";
  }
  
  $renderedspeech = $speechstring;
  $rendering = 0;
  $donerendering = 1;
}