# See lib/matrix.gi for source code for SemiEchelonMat()
# The parallelization here is based on that source code.
# Reading the sequential source code first may help you understand
#   the underlying algorithm.
# A more efficient parallelization would partition the matrix into sets
#   of adjacent rows, and send an entire set as a single \verb|taskInput|.
#   This would minimize communication overhead.


#Environment: vectors (basis vectors), heads, mat (matrix)
		[ We don't update mat, since it's not used afterwards. ]
#TaskInput:   i (row index of matrix)
#TaskOutput:  List of (1) j and (2) row i of matrix, mat, reduced by vectors
		j is the first non-zero element of row i
#Task:        Compute reduced row i from mat, vectors, heads
#UpdateEnvironment:  Given i, j, reduced row i, add
	       new basis vector to vectors and update heads[j] to point to it

MSSemiEchelonMat := function( mat )
    local zero,      # zero of the field of <mat>
          nrows,     # number of rows in <mat>
          ncols,     # number of columns in <mat>
          vectors,   # list of basis vectors
          heads,     # list of pivot positions in 'vectors'
          i,         # loop over rows
          SetTaskInput, DoTask, GetTaskOutput, UpdateEnvironment;

    mat:= List( mat, ShallowCopy );
    nrows:= Length( mat );
    ncols:= Length( mat[1] );

    zero:= Zero( mat[1][1] );

    heads:= ListWithIdenticalEntries( ncols, 0 );
    vectors := [];

    i := 0;
    SetTaskInput := function()
      if i < nrows then
        i := i+1;
        return i;
      else return NOTASK;
      fi;
    end;
    DoTask := function( i ) # taskInput = i
      local j;
      # Reduce the row with the known basis vectors.
      for j in [ 1 .. ncols ] do
        if heads[j] <> 0 then
          AddRowVector( mat[i], vectors[ heads[j] ], - mat[i][j] );
        fi;
      od;
      j := PositionNot( mat[i], zero );
      return [j, mat[i]]; # return taskOutput
    end;
    GetTaskOutput := function( taskOutput, i )
      local j;
      j := taskOutput[1];
      if j > ncols then
        return NO_ACTION;
      elif not IsUpToDate() then
        # Maybe it's possible to reduce mat[i] more w/ newer vectors[]
        return REDO_ACTION;
      else return UPDATE_ACTION;  # update vectors[]
      fi;
    end;
    UpdateEnvironment := function( taskOutput, i )
      local j, mat_sub_i; # mat_sub_i is local version of reduced mat[i]
      j := taskOutput[1];
      mat_sub_i := taskOutput[2];
      # We found a new basis vector.
      Add( vectors, mat_sub_i / mat_sub_i[j] );
      heads[j]:= Length( vectors );
    end;
    
    MasterSlave( SetTaskInput, DoTask, GetTaskOutput, UpdateEnvironment );

    return rec( heads   := heads,
                vectors := vectors );
end;

ParSemiEchelonMat := function( mat )
  return ParCallFuncList( MSSemiEchelonMat, [mat] );
end;
