#This file creates Slave Listener utilities.  An MPI process with
#  rank different from 0 goes into a:  Recv() - Eval() - Send() loop,
#  receiving commands from master and sending results back.
#A command can be either a GAP object or a string.
#  Since a GAP object as an argument to SendMsg() will usually be
#  evaluated locally before being sent, sending a string is more useful.
#  Strings are evaluated on the remote process.  See examples in comments.
#As with all of GAP/MPI, you must first have a procgroup file
#  in the current directory, and you must then call the gapmpi.sh script.

#The slave listener utilities defined are:

# SendMsg( command[, dest[, tag]] )
# RecvMsg( [dest] )
# SendRecvMsg( command[, dest[, tag]] )
# BroadcastMsg( command )
#   [Executes on slaves only.  SlaveListeners do not return a value]
# ParReset() # Try this to re-sync master and slaves
# FlushAllMsgs()
# PingSlave( slave ) # -1 means all slaves
# ParEval( stringCmd ): Evaluate on all processes
#   [like BroadcastMsg(), but ParEval() also executes on master
#    and also returns a value]
# ParCallFuncList( func, list );
# PrintToString( object [, ...] )
#   [ A useful idiom is:  ParEval( PrintToString( "foo := ", foo ) );
#     Note that PrintToString("abc") => "abc"  ( similar to Print() ) ]
# ParRead( filename ): Read file on all processes
# ParList( fnc, list ): Like List(), but faster due to use of slaves
# ProbeMsg([source]): Probe source and block (default=MPI_ANY_SOURCE)
#			return true when message is available
# ProbeMsgNonBlocking([source]): Probe if msg pending at source (default=MPI_ANY_SOURCE)
#			return true or false immediately
#NOTE that GetLastMsgSource() and GetLastMsgTag(), defined below, are
#  available to interrogate source and tag of pending msg
#Utilities inherited from MPI and UNIX:
# MPI_Comm_rank():  unique integer rank for each process
# MPI_Comm_size():  number of MPI processes
# GetLastMsgSource(): Source of last message received OR probed
# GetLastMsgTag():  Source of last message received OR probed.
#			Tags are an MPI concept.  You can safely ignore them.
# GAPMPI_Chdir(string): UNIX chdir(dir):  change directory
# GAPMPI_DirectoryCurrent(): Returns current directory as string
# GAPMPI_Alarm(seconds): UNIX alarm syscall: kill process in sec seconds;
#                        returns seconds until death from previous call
#			Put this in .gaprc to prevent runaway processes.
# GAPMPI_Nice(priority): UNIX setpriority syscall (nice in shell):
#			 Lower priority to new value between 19 (lowest
#			 priority) and -20 (highest priority, reserved for root)
# GAPMPI_LimitRss(bytes): UNIX setrlimit syscall (limit or ulimit in shell):
#			  Limit Resident Set Size (RSS or RAM usage) to bytes;
#			  Process uses more RAM if remaining RAM is unused.
#                         Returns previous RSS limit.
#			  Some UNIX dialects may always set it to all of RAM.

# Note that command can be a string or GAP object, but a GAP object
#  argument to SendMsg(), etc., will be evaluated locally first.
#  Also, a string allows passing of multiple commands.

# Examples [Assumes two slaves in procgroup file]:
#    myfnc := function() return 42; end;
#    BroadcastMsg( PrintToString( "myfnc := ", myfnc ) );
#    SendMsg( "a:=45; 3+4", 1 );
#    SendMsg( "a", 2 );
#    RecvMsg( 2 );
#    RecvMsg();
#    squares := ParList( [1..100], x->x^2 );
#    ParRead( "/home/gene/.gaprc" );
# Also, for a simple problem, execute:    MSexample();

#============================================================
# Test send and receive between slaves before getting serious
# Also, slave will try to change current directory to match master.

#Does this work in GAP-4.1?  Anyway, we check this in lib/init.g now.
# if not IsBound(MPI_Initialized) then quit;

# The gapmpi.c kernel function, GAPMPI_MakeString can be used instead.
MakeString := function( len )
  local i, x;
  x := [];
  for i in [1..len] do
    x[i] := ' ';
  od;
  ConvertToStringRep(x); # Needed to convert to compact C string.
  return x;
end;

# This is needed to obtain the absolute pathname of the current directory
GAPMPI_DirectoryCurrent := function()
  local paths, pwd, string, stream;
  paths := DirectoriesSystemPrograms();
  pwd := Filename( paths, "pwd" );
  string := "";
  stream := OutputTextString(string,true);
  Process( DirectoryCurrent(), pwd, InputTextNone(), stream, [] );
  CloseStream(stream);
  # string contains string with trailing newline
  if string[Length(string)] = '\n' then
    Unbind(string[Length(string)]);
  fi;
  return string;
end;

# Slave tells master his hostname and his UNIX process ID (pid)
# Master tells slave his home directory here.
# The master can send the slave a SIGINT signal directly.
masterHostname := GAPMPI_Hostname();
hostname := [];
pid := [];
if MPI_Comm_rank() <> 0 then
  Print("\nSlave ", MPI_Comm_rank(), " (", GAPMPI_Hostname(),
        ") reporting.\n\n");
  MPI_Send( GAPMPI_Hostname(), 0 );
  MPI_Send( String(GAPMPI_Getpid()), 0 );
  MPI_Probe();
  msg := MPI_Recv( GAPMPI_MakeString( MPI_Get_count() ), MPI_Get_source() );
  if not GAPMPI_Chdir( msg ) then
    Print( "Slave ", MPI_Comm_rank(), " was not able to change directory.\n" );
    Print( "  Using home directory.\n\n" );
  fi;
else
  Print("\nMaster here.  Will try to reach slaves.\n\n");
  for slave in [1..MPI_Comm_size() - 1] do
     MPI_Probe();
     Print( "\nReceiving from slave ", MPI_Get_source(), "\n\n");
     hostname[ MPI_Get_source() ] :=
       MPI_Recv( GAPMPI_MakeString( MPI_Get_count() ), MPI_Get_source() );
     MPI_Probe( MPI_Get_source() );
     pid[ MPI_Get_source() ] :=
       MPI_Recv( GAPMPI_MakeString( MPI_Get_count() ), MPI_Get_source() );
     MPI_Send( GAPMPI_DirectoryCurrent(), MPI_Get_source() );
  od;
fi;


#==========================================================================
# Utilities for messages, read-eval-print loop issues

NO_RET_VAL := "<no_return_val>";
last := NO_RET_VAL; # Avoid "unbound global variable" error below.

ReadEvalFromString := function(str)
  local i, j;
  if not IsString(str)
    then Error("string argument required"); fi;
  j := Length(str);
  if j = 0 then Error("Reading and evaluating null string"); fi;
  i := 1;
  # '\0' doesn't work for GAP input, but CHAR_INT(0) does.
  while i<j and str[i] <> CHAR_INT(0) do i:=i+1; od;
  if i>1 and str[i] = CHAR_INT(0) then i:=i-1; fi;
  while str[i] in " \n" and i > 1 do i := i-1; od;
  # i now points to last char before '\n' or points to last char of string.
  if str[i] <> ';' then i:=i+1; str[i] := ';'; fi;
  str[i+1] := CHAR_INT(0);
  last := NO_RET_VAL;  # default for last in case Read() doesn't set it.
  Read( InputTextString( str ) ); # Read() does ReadEval in GAP
  # If variable, last, is used, GAP complains about unbound global variable
  #  or  Variable: 'last' must have an assigned value; during execution
  # GAPMPI_Last() is a C routine to do the same.  GAP doesn't see use of last.
  return GAPMPI_Last();
end;

# Converts object to string representation
PrintToString := function( arg )
  local str, output, obj;
  str := "";
  output := OutputTextString( str, true ); # true means do as append
  # Would PrintTo -> AppendTo be necessary if "true"->"false" above?
  for obj in arg do
    PrintTo(output, obj);
  od;
  CloseStream(output);
  return str;
end;

# This isn't needed, but it illustrates what's possible.
ReadEvalPrint := function(command)
  local result;
  result := ReadEvalFromString( command );
  return PrintToString( result );
end;

if not IsBound( DeclareGlobalFunction ) then
  DeclareGlobalFunction := "NOT_DEFINED";
fi;
MyDeclareGlobalFunction := function( strname )
  if DeclareGlobalFunction <> "NOT_DEFINED" then  # if GAP-4.x ...
    # What is the GAP 4.x way to declare functions, use them, and then define
    # ReadEvalFromString( PrintToString( "DeclareGlobalFunction(\"",
    #				       strname, "\");" ) );
    ReadEvalFromString( PrintToString( strname, " := ReturnTrue;" ) );
  else                    # else GAP-4 beta 3
    ReadEvalFromString( PrintToString( strname, " := NewOperationArgs(\"",
				       strname, "\");" ) );
  fi;
end;

#Declare it now for InterruptSlave()
MyDeclareGlobalFunction("RecvStringMsg");

InterruptSlave := function( slave )
  if MPI_Comm_rank() <> 0 then
    Error("InterruptSlave() can only be called from master.");
  fi;
  if not slave in [1..MPI_Comm_size()-1] then
    Error("Slave ", slave, " does not exist.");
  fi;
  if hostname[slave] = masterHostname then
    Exec("kill -2", pid[slave]);
  else
    Exec("rsh", hostname[slave], "kill -2", pid[slave]);
  fi;
  # Throw away additional messages. (Not guaranteed:  possible race condition.)
  while MPI_Iprobe( slave ) do RecvStringMsg( slave ); od;
end;

#========================================================================
# Primitives for talking to slave listener.

# enumeration of special tags starts here;
CURR_SPEC_TAG := 10000;
NextSpecTag := function()
   CURR_SPEC_TAG := CURR_SPEC_TAG+1;
   return CURR_SPEC_TAG;
end;
SENDRECV_TAG := NextSpecTag();
PING_TAG := NextSpecTag();
SLAVEREPLY_TAG := NextSpecTag(); # Order is important; See SlaveListener();
QUIT_TAG := NextSpecTag();
BROADCAST_TAG := NextSpecTag();
MASTER_SLAVE_PING_TAG := NextSpecTag();
MASTER_SLAVE_QUIT_TAG := NextSpecTag();

SendMsg := function( arg )
  local command, dest, tag;
  command := arg[1]; dest := 1; tag := 1;
  if Length(arg) > 1 then dest := arg[2]; fi;
  if Length(arg) > 2 then tag := arg[3]; fi;
  if not IsInt(dest) or
      (not dest in [1..MPI_Comm_size() - 1] and 0 = MPI_Comm_rank() ) then
    Error("SendMsg: Invalid dest: ", dest);
  fi;
  MPI_Send( PrintToString(command), dest, tag );
end;

BroadcastMsg := function( command )
  local dest;
  if MPI_Comm_rank() <> 0 then
    Error("BroadcastMsg() should be called on master, only.");
  fi;
  for dest in [1..MPI_Comm_size() - 1] do
    SendMsg( command, dest, BROADCAST_TAG );
  od;
end;

RecvStringMsg := function( arg )
  local buffer, source;
  source := MPI_ANY_TAG;
  if Length(arg) > 0 then source := arg[1]; fi;
  MPI_Probe( source );
  # MPI_Get_count() assumes type MPI_CHAR
  # We add 2 to allow for ";\0".  GAP's NEW_STRING() actually creates an
  #  extra byte at end with '\0' automatically, but I'm paranoid.
  buffer := GAPMPI_MakeString( MPI_Get_count() + 2 );
  # Note MPI_ANY_SOURCE is bug if second message arrives after MPI_Get_count()
  return MPI_Recv( buffer, MPI_Get_source() );
end;

RecvMsg := function( arg )
  local source, str;
  source := MPI_ANY_SOURCE;
  if Length(arg) > 0 then source := arg[1]; fi;
  if source <> MPI_ANY_SOURCE
     and ( not IsInt(source) or 
      (not source in [1..MPI_Comm_size() - 1] and 0 = MPI_Comm_rank() ) ) then
    Error("RecvMsg: Invalid source: ", source);
  fi;
  str := RecvStringMsg( source );
  if Length(str) > 0 then
    ReadEvalFromString( str );
    return GAPMPI_Last();
  else return fail; # This happens when slave receives interrupt
  fi;
end;

SendRecvMsg := function( arg )
  local command, dest, tag;
  command := arg[1]; dest := 1; tag := SENDRECV_TAG;
  if Length(arg) > 1 then dest := arg[2]; fi;
  if Length(arg) > 2 then tag := arg[3]; fi;
  SendMsg( command, dest, tag );
  return RecvMsg( dest, SENDRECV_TAG );
end;

#======================================================================
# GAP Utilities for parallel programming

#Aliases for MPI functions that the user might prefer:
ProbeMsg := MPI_Probe;
ProbeMsgNonBlocking := MPI_Iprobe;
GetLastMsgTag := MPI_Get_tag; # Functional interface to status.tag
GetLastMsgSource := MPI_Get_source; # Functional interface to status.source

#Declare it now for ParReset()
MyDeclareGlobalFunction("PingSlave");

ParReset := function()
  local count, slave;
  # Do this first, in case a slave is stuck in MasterSlave mode.
  for slave in [1..MPI_Comm_size()-1] do
        SendMsg( false, slave, MASTER_SLAVE_QUIT_TAG );
  od;
  count := 0;
  for slave in [1..MPI_Comm_size()-1] do
    SendMsg( false, slave, PING_TAG );
    count := count - 1;
  od;
  for slave in [1..MPI_Comm_size()-1] do
    while MPI_Iprobe( slave ) do
      RecvStringMsg( slave );  # don't evaluate, just throw away
      count := count + 1;
    od;
    if MPI_Get_tag() <> PING_TAG then
      InterruptSlave( slave );
    fi;
  od;
  while MPI_Iprobe( ) do
    RecvStringMsg();  # don't evaluate, just throw away
    count := count + 1;
  od;
  PingSlave( -1 );  # -1 means all slaves
  return count;
end;

# A more sophisticated version of this would "rsh" a remote process
# and send a SIGINT to the remote slaves to also FlushAllMsgs().
# This also returns how many messages were flushed.
# Anyway, this should be replaced by ParReset() at that stage.
FlushAllMsgs := function ()
  local count, slave;
  # Do this first, in case a slave is stuck in MasterSlave mode.
  for slave in [1..MPI_Comm_size()-1] do
        SendMsg( false, slave, MASTER_SLAVE_QUIT_TAG );
  od;
  count := 0;
  while MPI_Iprobe() do
    RecvStringMsg();  # don't evaluate, just throw away
    count := count + 1;
  od;
  return count;
end;

PingSlave := function( dest )
  if dest = -1 then # then ping all slaves
    while dest in [1..MPI_Comm_size() - 1] do PingSlave( dest ); od;
  else
    SendRecvMsg( false, dest, PING_TAG );
    if MPI_Get_tag() <> PING_TAG then
      Error("Slave ", dest, " not responding.\n");
    fi;
  fi;
  return true;
end;

ParEval := function( command )
  local result;
  BroadcastMsg( command );
  result := ReadEvalFromString( command );
  if result = NO_RET_VAL then return; fi;
  return result;
end;

ParCallFuncList := function( func, list )
  return ParEval( PrintToString( "CallFuncList(", func, ",", list, ")" ) );
end;
ParList := function( list, fnc )
  local dest, nslaves, delta, range, tmp, slave, result;
  nslaves := MPI_Comm_size() - 1;
  delta := Int( Length(list) / nslaves - 1 ) + 1;
  for dest in [1..nslaves] do
    range := [ delta*(dest-1)+1 .. Minimum(delta*dest, Length(list)) ];
    ParEval( PrintToString( "parListFnc := ", fnc ) );
    SendMsg( PrintToString( "List( ", list{ range }, ", parListFnc )" ),
	     dest, dest );  # dest also serves as tag
  od;
  result := [];
  for dest in [1..nslaves] do
    tmp := RecvMsg();
    slave := MPI_Get_tag();
    range := [ delta*(slave-1)+1 .. Minimum(delta*slave, Length(list)) ];
    result{ range } := tmp;
  od;
  return result;
end;

ParRead := function( file )
  if not IsString(file) then Error("string argument required"); fi;
  ParEval( Concatenation( "Read( \"", file, "\" )" ) );
end;


#============================================================================
# Basic slave listener functions

SlaveListener := function()
  local result;
  while true do
    result := RecvMsg();
    if MPI_Get_tag() = QUIT_TAG then break; fi;
    if MPI_Get_tag() = PING_TAG then result := true; fi;
    # Note that BROADCAST_TAG > SLAVEREPLY_TAG, and so there's no reply.
    if MPI_Get_tag() < SLAVEREPLY_TAG then
      # if it will print with '"' then ...
      # The weird test condition mimics the logic of GAP's Print() command.
      if IsString(result)
          and ( Length(result) > 0 or IS_STRING_REP(result) ) then 
        result := Concatenation("\"", result, "\"");
      else
        result := PrintToString(result);
      fi;
      SendMsg( result, 0, MPI_Get_tag() );
    fi;
    #In GAP 4b3:
    # The statements below don't work.  Apparently, CloseStream() (or even the
    #  fflush() in PrintTo() ) inside ReadEvalPrint()
    #  doesn't take effect until after this statement has completed.
    # MPI_Send( PrintToString(result), 0, MPI_Get_tag() );
    # MPI_Send( ReadEvalPrint( buffer), 0 );
  od;
  return true; # Return anything but fail
end;

CloseSlaveListener := function ()
  local dest;
  if not MPI_Initialized() then return; fi;
  if MPI_Comm_rank() <> 0 then MPI_Finalize(); return; fi;
  ParReset(); # slave should still be alive for this.  QUIT_TAG is later.
  for dest in [1..MPI_Comm_size() - 1] do
    # paranoia:  ParReset() should already have taken us out of MasterSlave.
    SendMsg( "false", dest, MASTER_SLAVE_QUIT_TAG ); # in case in MasterSlave
    SendMsg( "false", dest, QUIT_TAG );
  od;
  MPI_Finalize();
end;

InstallAtExit( CloseSlaveListener );

#=============================================================================

if not IsBound( MasterSlave ) then
  ReadLib("masslave.g");
fi;

if MPI_Comm_rank() <> 0 then
  # Call SlaveListener(), and repeat if we catch SIGINT (if we return fail)
  while fail = GAPMPI_Catch( SlaveListener, [] ) do
    # GAP 4b3 wants a non-empty, non-trivial body.
    # This statement should have no effect, and it can go away in GAP 4.x
    for i in [1..100] do i := i; od;
  od;
fi;

# quit;
