进程 A 通过 binder_transaction 往进程 B 传输数据的过程,其中步骤 4、5、6 和 7 是一页一页地循环执行:
binder_buffer, 该 buffer 此时只有虚拟地址尚未映射物理页(指向进程 B 的用户空间)kmap 为物理页映射一个内核空间的虚拟页copy_ from_user 将进程 A 用户空间的数据写入刚刚映射的内核空间虚拟页中传统 IPC 一般是进程 A 执行系统调用陷入内核,将数据从进程 A 内存空间拷贝至内核,进程 B 为了读取数据需要执行系统调用陷入内核,内核将数据拷贝至进程 B 内存空间,那么一共是两次内存拷贝和两次用户态内核态切换
Binder IPC 的第一次内存拷贝同传统 IPC,也是将数据从从进程 A 内存空间拷贝至内核,但 Binder IPC 不需要第二次内存拷贝,因为它将进程 B 内存空间里指向数据的虚存映射至内核里保存数据的内核物理内存,这里进程 B 读取到的数据其实是第一次拷贝至内核的那份数据,所以是一次内存拷贝和两次用户态内核态切换
注意这里说的只需 一次拷贝 指的是一次 Binder IPC,而一次 Binder 方法调用也即 Binder RPC 需要两次 IPC:发送请求和接收响应,所以一次 Binder RPC 其实包含了两次 IPC 也即两次内存拷贝(四次用户态内核态的切换)
// <https://cs.android.com/android/platform/superproject/+/master:system/libhwbinder/include/hwbinder/IPCThreadState.h>
class IPCThreadState
{
private:
Parcel mIn; // binder driver 写入,应用进程读取并处理的内存区域,对应 binder_thread_read
Parcel mOut; // 应用进程写入,binder driver 读取并处理的内存区域,对应 binder_thread_write
};
// <http://www.aospxref.com/android-13.0.0_r3/xref/frameworks/native/libs/binder/IPCThreadState.cpp>
status_t IPCThreadState::transact(int32_t handle,
uint32_t code, const Parcel& data /* 参数被打包进这里 */,
Parcel* reply /* 用以接收响应数据 */, uint32_t flags)
{
// 将 [BC_TRANSACTION][binder_transaction_data] 写入 mOut
// mOut 是 client 进程内的一块内存区域
err = writeTransactionData(BC_TRANSACTION, flags, handle, code, data, nullptr);
// 将 request 发送给 binder driver 并等待 response
err = waitForResponse(reply);
// ...
return err;
}
status_t IPCThreadState::writeTransactionData(int32_t cmd /* BC_TRANSACTION */, uint32_t binderFlags,
int32_t handle, uint32_t code, const Parcel& data, status_t* statusBuffer)
{
binder_transaction_data tr;
tr.target.ptr = 0; /* Don't pass uninitialized stack data to a remote process */
tr.target.handle = handle;
tr.code = code;
tr.flags = binderFlags;
tr.cookie = 0;
tr.sender_pid = 0;
tr.sender_euid = 0;
const status_t err = data.errorCheck();
if (err == NO_ERROR) {
tr.data_size = data.ipcDataSize(); // 存放参数的内存区域大小
tr.data.ptr.buffer = data.ipcData(); // 其实就是 Parcel 存放数据的内存块的起始地址
tr.offsets_size = data.ipcObjectsCount()*sizeof(binder_size_t);
tr.data.ptr.offsets = data.ipcObjects();
} else if (statusBuffer) {
tr.flags |= TF_STATUS_CODE;
*statusBuffer = err;
tr.data_size = sizeof(status_t);
tr.data.ptr.buffer = reinterpret_cast<uintptr_t>(statusBuffer);
tr.offsets_size = 0;
tr.data.ptr.offsets = 0;
} else {
return (mLastError = err);
}
// mOut 写入 [BC_TRANSACTION][binder_transaction_data]
// 对应章节【指导思想】里的步骤一
mOut.writeInt32(cmd);
mOut.write(&tr, sizeof(tr));
return NO_ERROR;
}
status_t IPCThreadState::waitForResponse(Parcel *reply, status_t *acquireResult)
{
uint32_t cmd;
int32_t err;
while (1) {
if ((err=talkWithDriver()) < NO_ERROR) break;
err = mIn.errorCheck();
if (err < NO_ERROR) break;
if (mIn.dataAvail() == 0) continue;
cmd = (uint32_t)mIn.readInt32();
switch (cmd) {
case BR_FAILED_REPLY:
case BR_FROZEN_REPLY:
case BR_ACQUIRE_RESULT:
case BR_REPLY:
// ...
}
// 将 request 发送给 binder driver 并接收 response
// ioctl(mProcess->mDriverFD, BINDER_WRITE_READ, binder_write_read)
// [write_buffer, write_size] 是 request
// [read_buffer, read_size] 是 response
status_t IPCThreadState::talkWithDriver(bool doReceive)
{
if (mProcess->mDriverFD < 0) {
return -EBADF;
}
binder_write_read bwr;
// Is the read buffer empty?
const bool needRead = mIn.dataPosition() >= mIn.dataSize();
// We don't want to write anything if we are still reading
// from data left in the input buffer and the caller
// has requested to read the next data.
const size_t outAvail = (!doReceive || needRead) ? mOut.dataSize() : 0;
bwr.write_size = outAvail; // mOut 内存区域大小,也即参数包大小
bwr.write_buffer = (uintptr_t)mOut.data(); // mOut 地址,存放打包好的请求参数
// This is what we'll read.
if (doReceive && needRead) {
bwr.read_size = mIn.dataCapacity();
bwr.read_buffer = (uintptr_t)mIn.data();
} else {
bwr.read_size = 0;
bwr.read_buffer = 0;
}
// Return immediately if there is nothing to do.
if ((bwr.write_size == 0) && (bwr.read_size == 0)) return NO_ERROR;
bwr.write_consumed = 0;
bwr.read_consumed = 0;
status_t err;
do {
if (ioctl(mProcess->mDriverFD, BINDER_WRITE_READ, &bwr) >= 0) // 对应章节【指导思想】里的步骤二
err = NO_ERROR;
else
err = -errno;
if (mProcess->mDriverFD < 0) {
err = -EBADF;
}
} while (err == -EINTR);
// ...
}
// <https://cs.android.com/android/kernel/superproject/+/common-android-mainline:common/drivers/android/binder.c>
static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) {...}
static int binder_ioctl_write_read(struct file *filp,
unsigned int cmd, unsigned long arg,
struct binder_thread *thread)
{
int ret = 0;
struct binder_proc *proc = filp->private_data;
unsigned int size = _IOC_SIZE(cmd);
void __user *ubuf = (void __user *)arg;
struct binder_write_read bwr;
if (size != sizeof(struct binder_write_read)) {
ret = -EINVAL;
goto out;
}
// 将 binder_write_read 从用户空间拷贝一份至内核空间
if (copy_from_user(&bwr, ubuf, sizeof(bwr))) {
ret = -EFAULT;
goto out;
}
// 先处理应用进程的请求 write_buffer
if (bwr.write_size > 0) {
ret = binder_thread_write(proc, thread,
bwr.write_buffer,
bwr.write_size,
&bwr.write_consumed);
trace_binder_write_done(ret);
if (ret < 0) {
bwr.read_consumed = 0;
if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
ret = -EFAULT;
goto out;
}
}
if (bwr.read_size > 0) {
ret = binder_thread_read(proc, thread, bwr.read_buffer,
bwr.read_size,
&bwr.read_consumed,
filp->f_flags & O_NONBLOCK);
trace_binder_read_done(ret);
binder_inner_proc_lock(proc);
if (!binder_worklist_empty_ilocked(&proc->todo))
binder_wakeup_proc_ilocked(proc);
binder_inner_proc_unlock(proc);
if (ret < 0) {
if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
ret = -EFAULT;
goto out;
}
}
binder_debug(BINDER_DEBUG_READ_WRITE,
"%d:%d wrote %lld of %lld, read return %lld of %lld\\\\n",
proc->pid, thread->pid,
(u64)bwr.write_consumed, (u64)bwr.write_size,
(u64)bwr.read_consumed, (u64)bwr.read_size);
if (copy_to_user(ubuf, &bwr, sizeof(bwr))) {
ret = -EFAULT;
goto out;
}
out:
return ret;
}
static int binder_thread_write(struct binder_proc *proc,
struct binder_thread *thread,
binder_uintptr_t binder_buffer, size_t size,
binder_size_t *consumed)
{
uint32_t cmd;
struct binder_context *context = proc->context;
void __user *buffer = (void __user *)(uintptr_t)binder_buffer;
void __user *ptr = buffer + *consumed;
void __user *end = buffer + size;
while (ptr < end && thread->return_error.cmd == BR_OK) {
int ret;
if (get_user(cmd, (uint32_t __user *)ptr))
return -EFAULT;
ptr += sizeof(uint32_t);
trace_binder_command(cmd);
if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.bc)) {
atomic_inc(&binder_stats.bc[_IOC_NR(cmd)]);
atomic_inc(&proc->stats.bc[_IOC_NR(cmd)]);
atomic_inc(&thread->stats.bc[_IOC_NR(cmd)]);
}
switch (cmd) {
case BC_TRANSACTION:
case BC_REPLY: {
struct binder_transaction_data tr;
// 将 binder_transaction_data 从用户空间拷贝至内核空间
if (copy_from_user(&tr, ptr, sizeof(tr)))
return -EFAULT;
ptr += sizeof(tr);
binder_transaction(proc, thread, &tr,
cmd == BC_REPLY, 0);
break;
}
// ...
}
static void binder_transaction(struct binder_proc *proc,
struct binder_thread *thread,
struct binder_transaction_data *tr, int reply,
binder_size_t extra_buffers_size)
{
struct binder_transaction *t;
const void __user *user_buffer = (const void __user *)
(uintptr_t)tr->data.ptr.buffer; // 用户空间的数据块地址
// ...
// 从进程 B 的用以 binder 数据传输的虚存里找一块满足大小的虚存块 binder_buffer.user_data 用以存放 request
// 这里就会发生物理页的分配了,binder 在内核分配物理页,并将进程 B 的虚存会映射至内核物理页
// 这样进程 B 就可以直接读取 request 而无需进行一次内核态至用户态的内存拷贝
// 参看章节【映射进程虚存与内核内存】
// 这里对应章节【指导思想】里的步骤三、四、五和六
t->buffer = binder_alloc_new_buf(&target_proc->alloc, tr->data_size,
tr->offsets_size, extra_buffers_size,
!reply && (t->flags & TF_ONE_WAY), current->tgid);
// ...
// 将 request 从进程 A (tr->data.ptr.offsets)拷贝至进程 B(t->buffer->user_data)
// 这里就是 binder ipc 中发生的唯一一次数据拷贝
// 因为进程 B 的虚存地址实际上是映射至内核物理内存,所以 request 实际上是从进程 A 拷贝至内核,进程 B 读取的是内核物理内存空间
// 对应章节【指导思想】里的步骤七
if (binder_alloc_copy_user_to_buffer(
&target_proc->alloc,
t->buffer,
ALIGN(tr->data_size, sizeof(void *)),
(const void __user *)
(uintptr_t)tr->data.ptr.offsets,
tr->offsets_size)) {
binder_user_error("%d:%d got transaction with invalid offsets ptr\\\\n",
proc->pid, thread->pid);
return_error = BR_FAILED_REPLY;
return_error_param = -EFAULT;
return_error_line = __LINE__;
goto err_copy_data_failed;
}
// ...
off_start_offset = ALIGN(tr->data_size, sizeof(void *));
buffer_offset = off_start_offset;
off_end_offset = off_start_offset + tr->offsets_size;
sg_buf_offset = ALIGN(off_end_offset, sizeof(void *));
sg_buf_end_offset = sg_buf_offset + extra_buffers_size -
ALIGN(secctx_sz, sizeof(u64));
off_min = 0;
for (buffer_offset = off_start_offset; buffer_offset < off_end_offset;
buffer_offset += sizeof(binder_size_t)) {
struct binder_object_header *hdr;
size_t object_size;
struct binder_object object;
binder_size_t object_offset;
binder_size_t copy_size;
if (binder_alloc_copy_from_buffer(&target_proc->alloc,
&object_offset,
t->buffer,
buffer_offset,
sizeof(object_offset))) {
binder_txn_error("%d:%d copy offset from buffer failed\\\\n",
thread->pid, proc->pid);
return_error = BR_FAILED_REPLY;
return_error_param = -EINVAL;
return_error_line = __LINE__;
goto err_bad_offset;
}
/*
* Copy the source user buffer up to the next object
* that will be processed.
*/
copy_size = object_offset - user_offset;
if (copy_size && (user_offset > object_offset ||
binder_alloc_copy_user_to_buffer(
&target_proc->alloc,
t->buffer, user_offset,
user_buffer + user_offset,
copy_size))) {
binder_user_error("%d:%d got transaction with invalid data ptr\\\\n",
proc->pid, thread->pid);
return_error = BR_FAILED_REPLY;
return_error_param = -EFAULT;
return_error_line = __LINE__;
goto err_copy_data_failed;
}
object_size = binder_get_object(target_proc, user_buffer,
t->buffer, object_offset, &object);
if (object_size == 0 || object_offset < off_min) {
binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\\\\n",
proc->pid, thread->pid,
(u64)object_offset,
(u64)off_min,
(u64)t->buffer->data_size);
return_error = BR_FAILED_REPLY;
return_error_param = -EINVAL;
return_error_line = __LINE__;
goto err_bad_offset;
}
/*
* Set offset to the next buffer fragment to be
* copied
*/
user_offset = object_offset + object_size;
hdr = &object.hdr;
off_min = object_offset + object_size;
switch (hdr->type) {
case BINDER_TYPE_BINDER:
case BINDER_TYPE_HANDLE:
case BINDER_TYPE_FD:
case BINDER_TYPE_PTR:
// ...
}
}
/* Done processing objects, copy the rest of the buffer */
if (binder_alloc_copy_user_to_buffer(
&target_proc->alloc,
t->buffer, user_offset,
user_buffer + user_offset,
tr->data_size - user_offset)) {
binder_user_error("%d:%d got transaction with invalid data ptr\\\\n",
proc->pid, thread->pid);
return_error = BR_FAILED_REPLY;
return_error_param = -EFAULT;
return_error_line = __LINE__;
goto err_copy_data_failed;
}
// ...
}
在上一章节 client write request 里,binder 已经把 request 从 client 进程拷贝至内核,并将 server 进程内的一块虚存映射至内核里的 request,下面看看 server 收到了什么、如何处理 request 数据
// 在【Binder IPC 线程调度模型】里讲过:
// server binder 线程先往 binder 发送数据(binder_thread_write),如果有的话
// 然后读取 binder 发送过来的数据(binder_thread_read),如果没有数据需要处理则 binder 线程会阻塞在 binder_wait_for_work(让渡 CPU)
// 那么 binder 将 request(BINDER_WORK_TRANSACTION) 发送至 server 进程的 todo list,server binder 线程将被唤醒
static int binder_thread_read(struct binder_proc *proc,
struct binder_thread *thread,
binder_uintptr_t binder_buffer, /* 应用进程内存地址,实际上是 IPCThreadState->mIn 这块内存空间,看 talkWithDriver */
size_t size, /* IPCThreadState->mIn 的大小 */
binder_size_t *consumed /* 输出,表示往 binder_buffer 里写入了多少内容 */,
int non_block)
{
void __user *buffer = (void __user *)(uintptr_t)binder_buffer; // 应用进程的 IPCThreadState->mIn 这块内存空间
void __user *ptr = buffer + *consumed; // 应用进程的 IPCThreadState->mIn 这块内存空间
void __user *end = buffer + size;
int ret = 0;
int wait_for_proc_work;
if (*consumed == 0) {
if (put_user(BR_NOOP, (uint32_t __user *)ptr))
return -EFAULT;
ptr += sizeof(uint32_t);
}
retry:
binder_inner_proc_lock(proc);
wait_for_proc_work = binder_available_for_proc_work_ilocked(thread);
binder_inner_proc_unlock(proc);
thread->looper |= BINDER_LOOPER_STATE_WAITING;
trace_binder_wait_for_work(wait_for_proc_work,
!!thread->transaction_stack,
!binder_worklist_empty(proc, &thread->todo));
if (wait_for_proc_work) {
if (!(thread->looper & (BINDER_LOOPER_STATE_REGISTERED |
BINDER_LOOPER_STATE_ENTERED))) {
binder_user_error("%d:%d ERROR: Thread waiting for process work before calling BC_REGISTER_LOOPER or BC_ENTER_LOOPER (state %x)\\\\n",
proc->pid, thread->pid, thread->looper);
wait_event_interruptible(binder_user_error_wait,
binder_stop_on_user_error < 2);
}
trace_android_vh_binder_restore_priority(NULL, current);
binder_restore_priority(thread, &proc->default_priority);
}
if (non_block) {
if (!binder_has_work(thread, wait_for_proc_work))
ret = -EAGAIN;
} else {
ret = binder_wait_for_work(thread, wait_for_proc_work); // binder thread 将在这里被唤醒并继续
}
thread->looper &= ~BINDER_LOOPER_STATE_WAITING;
if (ret)
return ret;
while (1) {
// ...
struct binder_transaction_data_secctx tr;
struct binder_transaction_data *trd = &tr.transaction_data;
struct binder_transaction *t = NULL;
struct binder_work *w = NULL;
size_t trsize = sizeof(*trd);
w = binder_dequeue_work_head_ilocked(list);
switch (w->type) {
case BINDER_WORK_TRANSACTION: { // 收到 binder 给的 request
binder_inner_proc_unlock(proc);
t = container_of(w, struct binder_transaction, work);
} break;
// ...
trd->data_size = t->buffer->data_size;
trd->offsets_size = t->buffer->offsets_size;
// 在章节【client write request】里的 binder_transaction 里说过
// binder_transaction->binder_buffer 是从进程 B 的空闲虚存空间里 binder_alloc->free_buffers 挑选的满足大小的一块虚存
// 也即 binder_transaction->binder_buffer->user_data 指向 server 进程内的虚存
// 而这块虚存映射到内核内存空间里的物理内存(request 拷贝了一次到内核)
trd->data.ptr.buffer = (uintptr_t)t->buffer->user_data;
trd->data.ptr.offsets = trd->data.ptr.buffer +
ALIGN(t->buffer->data_size,
sizeof(void *));
tr.secctx = t->security_ctx;
if (t->security_ctx) {
cmd = BR_TRANSACTION_SEC_CTX;
trsize = sizeof(tr);
}
// 往应用进程内存地址 buffer 写入 BR_TRANSACTION,实际上是应用进程的 IPCThreadState->mIn 这块内存空间
if (put_user(cmd, (uint32_t __user *)ptr)) {
if (t_from)
binder_thread_dec_tmpref(t_from);
binder_cleanup_transaction(t, "put_user failed",
BR_FAILED_REPLY);
return -EFAULT;
}
ptr += sizeof(uint32_t);
// 往应用进程内存地址 buffer 写入 binder_transaction_data
if (copy_to_user(ptr, &tr, trsize)) {
if (t_from)
binder_thread_dec_tmpref(t_from);
binder_cleanup_transaction(t, "copy_to_user failed",
BR_FAILED_REPLY);
return -EFAULT;
}
// ...
}
status_t IPCThreadState::waitForResponse(Parcel *reply, status_t *acquireResult)
{
uint32_t cmd;
int32_t err;
while (1) {
if ((err=talkWithDriver()) < NO_ERROR) break; // binder 给过来的数据被写入至 IPCThreadState->mIn
err = mIn.errorCheck();
if (err < NO_ERROR) break;
if (mIn.dataAvail() == 0) continue;
cmd = (uint32_t)mIn.readInt32();
switch (cmd) {
// ...
default:
err = executeCommand(cmd);
if (err != NO_ERROR) goto finish;
break;
}
}
finish:
if (err != NO_ERROR) {
if (acquireResult) *acquireResult = err;
if (reply) reply->setError(err);
mLastError = err;
logExtendedError();
}
return err;
}
status_t IPCThreadState::executeCommand(int32_t cmd)
{
BBinder* obj;
RefBase::weakref_type* refs;
status_t result = NO_ERROR;
switch ((uint32_t)cmd) {
// ...
case BR_TRANSACTION_SEC_CTX:
case BR_TRANSACTION:
{
binder_transaction_data_secctx tr_secctx;
binder_transaction_data& tr = tr_secctx.transaction_data;
if (cmd == (int) BR_TRANSACTION_SEC_CTX) {
result = mIn.read(&tr_secctx, sizeof(tr_secctx));
} else {
result = mIn.read(&tr, sizeof(tr));
tr_secctx.secctx = 0;
}
ALOG_ASSERT(result == NO_ERROR,
"Not enough command data for brTRANSACTION");
if (result != NO_ERROR) break;
// tr.data.ptr.buffer 是此应用进程的虚存地址,实际映射至内核物理内存里保存的 request 拷贝
// 用 Parcel 包装 tr.data.ptr.buffer 这块地址,然后交由 binder server impl 按照 aidl 协议逐个读取参数
Parcel buffer;
buffer.ipcSetDataReference(
reinterpret_cast<const uint8_t*>(tr.data.ptr.buffer),
tr.data_size,
reinterpret_cast<const binder_size_t*>(tr.data.ptr.offsets),
tr.offsets_size/sizeof(binder_size_t), freeBuffer);
// ...
// 还记得 BBinder 吗?赶紧回顾下【Binder IPC 过程中常用的对象和类】
Parcel reply;
if (tr.target.ptr) {
// We only have a weak reference on the target object, so we must first try to
// safely acquire a strong reference before doing anything else with it.
if (reinterpret_cast<RefBase::weakref_type*>(
tr.target.ptr)->attemptIncStrong(this)) {
error = reinterpret_cast<BBinder*>(tr.cookie)->transact(tr.code, buffer,
&reply, tr.flags);
reinterpret_cast<BBinder*>(tr.cookie)->decStrong(this);
} else {
error = UNKNOWN_TRANSACTION;
}
} else {
error = the_context_object->transact(tr.code, buffer, &reply, tr.flags);
}
// 如果不是 one way 还需要将 response/reply 发送给 client
if ((tr.flags & TF_ONE_WAY) == 0) {
LOG_ONEWAY("Sending reply to %d!", mCallingPid);
if (error < NO_ERROR) reply.setError(error);
// b/238777741: clear buffer before we send the reply.
// Otherwise, there is a race where the client may
// receive the reply and send another transaction
// here and the space used by this transaction won't
// be freed for the client.
buffer.setDataSize(0);
constexpr uint32_t kForwardReplyFlags = TF_CLEAR_BUF;
sendReply(reply, (tr.flags & kForwardReplyFlags));
} else { //...
}
status_t IPCThreadState::talkWithDriver(bool doReceive)
{
if (mProcess->mDriverFD < 0) {
return -EBADF;
}
binder_write_read bwr;
// Is the read buffer empty?
const bool needRead = mIn.dataPosition() >= mIn.dataSize();
// We don't want to write anything if we are still reading
// from data left in the input buffer and the caller
// has requested to read the next data.
const size_t outAvail = (!doReceive || needRead) ? mOut.dataSize() : 0;
bwr.write_size = outAvail;
bwr.write_buffer = (uintptr_t)mOut.data();
// binder_write_read.read_buffer 实际上指向 mIn
if (doReceive && needRead) {
bwr.read_size = mIn.dataCapacity();
bwr.read_buffer = (uintptr_t)mIn.data();
} else {
bwr.read_size = 0;
bwr.read_buffer = 0;
}
// Return immediately if there is nothing to do.
if ((bwr.write_size == 0) && (bwr.read_size == 0)) return NO_ERROR;
bwr.write_consumed = 0;
bwr.read_consumed = 0;
status_t err;
do {
if (ioctl(mProcess->mDriverFD, BINDER_WRITE_READ, &bwr) >= 0)
err = NO_ERROR;
else
err = -errno;
if (mProcess->mDriverFD < 0) {
err = -EBADF;
}
} while (err == -EINTR);
// ...
}