分类 技术分享 下的文章

ssh client代码阅读 (unfinished)

继续来读代码。今天读ssh client。在这之前,让我们先了解一下ssh协议。

SSH协议介绍如下:
https://www.ssh.com/academy/ssh/protocol

SSH协议是一个经典的CS模型,SSH的客户端主导连接设置过程,并使用公钥加密来验证SSH服务器的身份。
在设置阶段之后,SSH协议使用强对称加密和散列算法来确保客户端和服务器之间交换的数据的私密性和完整性。

1. 客户端链接服务器;(C->S)
2. 服务端发送服务端的公钥;(S->C)
3. 服务端和客户端互相沟通,产生一个安全通道;(C<->S)
4. 用户登录到服务端的操作系统里

SSH常用的协议包括:

RFC 4251 - Secure Shell (SSH) 协议架构
RFC 4253 - Secure Shell (SSH) 传输层协议
RFC 4252 - Secure Shell (SSH) 身份验证协议
RFC 4254 - Secure Shell (SSH) 连接协议

以及一个基于SSH的,SFTP文件传输协议。看RFC不如直接看代码,开始。在ssh.c中有一个大型结构体session_state。connection_in和out两个变量用于保存通信时的文件描述符,connection_in用于读取;out用于写入;如果是socket的话,他们两个可以是同一个描述符。对应的,receive/send_context用于加解密时两个方向的上下文。input/output用于加解密时传输的原始数据。incoming/outgoing_packet是当前正在处理的收取/发送包。compression_buffer用于packet的加解密。

struct session_state {
    int connection_in;
    int connection_out;

    /* Protocol flags for the remote side. */
    u_int remote_protocol_flags;

    /* Encryption context for receiving data.  Only used for decryption. */
    struct sshcipher_ctx *receive_context;

    /* Encryption context for sending data.  Only used for encryption. */
    struct sshcipher_ctx *send_context;

    /* Buffer for raw input data from the socket. */
    struct sshbuf *input;

    /* Buffer for raw output data going to the socket. */
    struct sshbuf *output;

    /* Buffer for the partial outgoing packet being constructed. */
    struct sshbuf *outgoing_packet;

    /* Buffer for the incoming packet currently being processed. */
    struct sshbuf *incoming_packet;

    /* Scratch buffer for packet compression/decompression. */
    struct sshbuf *compression_buffer;

#ifdef WITH_ZLIB
    /* Incoming/outgoing compression dictionaries */
    z_stream compression_in_stream;
    z_stream compression_out_stream;
#endif
    int compression_in_started;
    int compression_out_started;
    int compression_in_failures;
    int compression_out_failures;

    /* default maximum packet size */
    u_int max_packet_size;

    /* Flag indicating whether this module has been initialized. */
    int initialized;

    /* Set to true if the connection is interactive. */
    int interactive_mode;

    /* Set to true if we are the server side. */
    int server_side;

    /* Set to true if we are authenticated. */
    int after_authentication;

    int keep_alive_timeouts;

    /* The maximum time that we will wait to send or receive a packet */
    int packet_timeout_ms;

    /* Session key information for Encryption and MAC */
    struct newkeys *newkeys[MODE_MAX];
    struct packet_state p_read, p_send;

    /* Volume-based rekeying */
    u_int64_t max_blocks_in, max_blocks_out, rekey_limit;

    /* Time-based rekeying */
    u_int32_t rekey_interval;   /* how often in seconds */
    time_t rekey_time;  /* time of last rekeying */

    /* roundup current message to extra_pad bytes */
    u_char extra_pad;

    /* XXX discard incoming data after MAC error */
    u_int packet_discard;
    size_t packet_discard_mac_already;
    struct sshmac *packet_discard_mac;

    /* Used in packet_read_poll2() */
    u_int packlen;

    /* Used in packet_send2 */
    int rekeying;

    /* Used in ssh_packet_send_mux() */
    int mux;

    /* Used in packet_set_interactive */
    int set_interactive_called;

    /* Used in packet_set_maxsize */
    int set_maxsize_called;

    /* One-off warning about weak ciphers */
    int cipher_warning_done;

    /* Hook for fuzzing inbound packets */
    ssh_packet_hook_fn *hook_in;
    void *hook_in_ctx;

    TAILQ_HEAD(, packet) outgoing;
};

以incoming_packet为例,可以在main等 --> ssh_packet_set_connection --> ssh_alloc_session_state 中看到它的踪迹。ssh_alloc_session_state()初始化一个ssh结构体。

struct ssh *
ssh_alloc_session_state(void)
{
    struct ssh *ssh = NULL;
    struct session_state *state = NULL;

    if ((ssh = calloc(1, sizeof(*ssh))) == NULL ||
        (state = calloc(1, sizeof(*state))) == NULL ||
        (ssh->kex = kex_new()) == NULL ||
        (state->input = sshbuf_new()) == NULL ||
        (state->output = sshbuf_new()) == NULL ||
        (state->outgoing_packet = sshbuf_new()) == NULL ||
        (state->incoming_packet = sshbuf_new()) == NULL)
        goto fail;
    TAILQ_INIT(&state->outgoing);
    TAILQ_INIT(&ssh->private_keys);
    TAILQ_INIT(&ssh->public_keys);
    state->connection_in = -1;
    state->connection_out = -1;
    state->max_packet_size = 32768;
    state->packet_timeout_ms = -1;
    state->p_send.packets = state->p_read.packets = 0;
    state->initialized = 1;
    /*
     * ssh_packet_send2() needs to queue packets until
     * we've done the initial key exchange.
     */
    state->rekeying = 1;
    ssh->state = state;
    return ssh;
 fail:
    if (ssh) {
        kex_free(ssh->kex);
        free(ssh);
    }
    if (state) {
        sshbuf_free(state->input);
        sshbuf_free(state->output);
        sshbuf_free(state->incoming_packet);
        sshbuf_free(state->outgoing_packet);
        free(state);
    }
    return NULL;
}

在上述函数中出现的sshbuf_new定义如下。它新建一个sshbuf结构体,并申请256(SSHBUF_SIZE_INIT)字节的初始内存,最大内存是0x800000(SSHBUF_SIZE_MAX)。同时可以看到它具有一个refcount。

struct sshbuf *
sshbuf_new(void)
{
    struct sshbuf *ret;

    if ((ret = calloc(sizeof(*ret), 1)) == NULL)
        return NULL;
    ret->alloc = SSHBUF_SIZE_INIT;
    ret->max_size = SSHBUF_SIZE_MAX;
    ret->readonly = 0;
    ret->refcount = 1;
    ret->parent = NULL;
    if ((ret->cd = ret->d = calloc(1, ret->alloc)) == NULL) {
        free(ret);
        return NULL;
    }
    return ret;
}

继续以incoming_packet为例,可以看到它被引用的位置全部都在packet.c中。因此这个文件一定是与收发包相关的重要文件。从密度上看,ssh_packet_read_poll2是密度最高的,它有如下的调用关系:ssh_packet_read (ssh_packet_read_expect) --> ssh_packet_read_seqnr --> ssh_packet_read_poll_seqnr --> ssh_packet_read_poll2。其中,ssh_packet_read(_expect)没有任何引用,因此放弃这条路线;另一条路线是main (skip_connect label) --> ssh_session2 --> client_loop --> client_process_buffered_input_packets(clientloop.c,serverloop.c中也有类似路径) --> ssh_dispatch_run_fatal --> ssh_dispatch_run --> ssh_packet_read_seqnr --> ssh_packet_read_poll_seqnr --> ssh_packet_read_poll2。显然下面这条是client的路径。我们先不看main,而是直接从client_loop这里开始。

client_loop用于实现与服务器的交互会话。它在用户通过身份验证,并且在远程主机上启动了一个命令后被调用。 escape_char是 SSH_ESCAPECHAR_NONE以外的字符时,会被用于终止或暂停会话的控制字符。

client_loop的第一阶段,初始化变量,如下所示。

/* Initialize variables. */
last_was_cr = 1;
exit_status = -1;
connection_in = ssh_packet_get_connection_in(ssh);
connection_out = ssh_packet_get_connection_out(ssh);

/* Returns the socket used for reading. */

int
ssh_packet_get_connection_in(struct ssh *ssh)
{
    return ssh->state->connection_in;
}

/* Returns the descriptor used for writing. */

int
ssh_packet_get_connection_out(struct ssh *ssh)
{
    return ssh->state->connection_out;
}

然后,设置不同的信号由signal_handler处理。

/*
 * Set signal handlers, (e.g. to restore non-blocking mode)
 * but don't overwrite SIG_IGN, matches behaviour from rsh(1)
 */
if (ssh_signal(SIGHUP, SIG_IGN) != SIG_IGN)
    ssh_signal(SIGHUP, signal_handler);
if (ssh_signal(SIGINT, SIG_IGN) != SIG_IGN)
    ssh_signal(SIGINT, signal_handler);
if (ssh_signal(SIGQUIT, SIG_IGN) != SIG_IGN)
    ssh_signal(SIGQUIT, signal_handler);
if (ssh_signal(SIGTERM, SIG_IGN) != SIG_IGN)
    ssh_signal(SIGTERM, signal_handler);
ssh_signal(SIGWINCH, window_change_handler);

然后是检查是否有pty,如果有则调用enter_raw_mode进入交互模式。接着是设置escape_char_arg,设置定期的服务器活动检查。

if (have_pty)
    enter_raw_mode(options.request_tty == REQUEST_TTY_FORCE);

session_ident = ssh2_chan_id;
if (session_ident != -1) {
    if (escape_char_arg != SSH_ESCAPECHAR_NONE) {
        channel_register_filter(ssh, session_ident,
            client_simple_escape_filter, NULL,
            client_filter_cleanup,
            client_new_escape_filter_ctx(
            escape_char_arg));
    }
    channel_register_cleanup(ssh, session_ident,
        client_channel_closed, 0);
}

schedule_server_alive_check();

接下来进入大循环。只要还正常工作,就在循环的开头处理服务器传来的packet。

/* Main loop of the client for the interactive session mode. */
while (!quit_pending) {

    /* Process buffered packets sent by the server. */
    client_process_buffered_input_packets(ssh);

处理代码如下,核心在于ssh_dispatch_run。这也正是链条上的:main --> ssh_session2 --> client_loop --> client_process_buffered_input_packets --> ssh_dispatch_run_fatal --> [[ssh_dispatch_run]] --> ssh_packet_read_seqnr --> ssh_packet_read_poll_seqnr --> ssh_packet_read_poll2 这个位置。让我们仔细阅读。

int
ssh_dispatch_run(struct ssh *ssh, int mode, volatile sig_atomic_t *done)
{
    int r;
    u_char type;
    u_int32_t seqnr;

    for (;;) {
        if (mode == DISPATCH_BLOCK) {
            r = ssh_packet_read_seqnr(ssh, &type, &seqnr);
            if (r != 0)
                return r;
        } else {
            r = ssh_packet_read_poll_seqnr(ssh, &type, &seqnr);
            if (r != 0)
                return r;
            if (type == SSH_MSG_NONE)
                return 0;
        }
        if (type > 0 && type < DISPATCH_MAX &&
            ssh->dispatch[type] != NULL) {
            if (ssh->dispatch_skip_packets) {
                debug2("skipped packet (type %u)", type);
                ssh->dispatch_skip_packets--;
                continue;
            }
            r = (*ssh->dispatch[type])(type, seqnr, ssh);
            if (r != 0)
                return r;
        } else {
            r = sshpkt_disconnect(ssh,
                "protocol error: rcvd type %d", type);
            if (r != 0)
                return r;
            return SSH_ERR_DISCONNECTED;
        }
        if (done != NULL && *done)
            return 0;
    }
}

void
ssh_dispatch_run_fatal(struct ssh *ssh, int mode, volatile sig_atomic_t *done)
{
    int r;

    if ((r = ssh_dispatch_run(ssh, mode, done)) != 0)
        sshpkt_fatal(ssh, r, "%s", __func__);
}
static void
client_process_buffered_input_packets(struct ssh *ssh)
{
    ssh_dispatch_run_fatal(ssh, DISPATCH_NONBLOCK, &quit_pending);
}

首先,基于是否阻塞,选择调用ssh_packet_read_seqnr还是ssh_packet_read_poll_seqnr。获取一个type,并用于不同的回调函数上(*ssh->dispatch[type])(type, seqnr, ssh);。这里也是一个典型的状态机没跑了。

    if (mode == DISPATCH_BLOCK) {
        r = ssh_packet_read_seqnr(ssh, &type, &seqnr);
        if (r != 0)
            return r;
    } else {
        r = ssh_packet_read_poll_seqnr(ssh, &type, &seqnr);
        if (r != 0)
            return r;
        if (type == SSH_MSG_NONE)
            return 0;
    }

其余的并没有太多有实际影响的操作,让我们跟入ssh_packet_read_seqnr。它有一个8192字节的buf,等待ssh server的输入,如果有的话,则调用ssh_packet_read_poll_seqnr,这很好,因为上面我们也准备看这个函数。刚好可以一起看掉。

int
ssh_packet_read_seqnr(struct ssh *ssh, u_char *typep, u_int32_t *seqnr_p)
{
    struct session_state *state = ssh->state;
    int len, r, ms_remain;
    struct pollfd pfd;
    char buf[8192];
    struct timeval start;
    struct timespec timespec, *timespecp = NULL;

    DBG(debug("packet_read()"));

    /*
     * Since we are blocking, ensure that all written packets have
     * been sent.
     */
    if ((r = ssh_packet_write_wait(ssh)) != 0)
        goto out;

    /* Stay in the loop until we have received a complete packet. */
    for (;;) {
        /* Try to read a packet from the buffer. */
        r = ssh_packet_read_poll_seqnr(ssh, typep, seqnr_p);
        if (r != 0)
            break;
        /* If we got a packet, return it. */
        if (*typep != SSH_MSG_NONE)
            break;

目光转向ssh_packet_read_poll_seqnr。它调用ssh_packet_read_poll2,并根据消息类型来加以处理。

int
ssh_packet_read_poll_seqnr(struct ssh *ssh, u_char *typep, u_int32_t *seqnr_p)
{
    struct session_state *state = ssh->state;
    u_int reason, seqnr;
    int r;
    u_char *msg;

    for (;;) {
        msg = NULL;
        r = ssh_packet_read_poll2(ssh, typep, seqnr_p);
        if (r != 0)
            return r;
        if (*typep) {
            state->keep_alive_timeouts = 0;
            DBG(debug("received packet type %d", *typep));
        }
        switch (*typep) {

对于ssh_packet_read_poll2,如果是mux状态(multiplex多路),则转交给ssh_packet_read_poll2_mux来处理,否则进入自己的流程。

int
ssh_packet_read_poll2(struct ssh *ssh, u_char *typep, u_int32_t *seqnr_p)
{
    struct session_state *state = ssh->state;
    u_int padlen, need;
    u_char *cp;
    u_int maclen, aadlen = 0, authlen = 0, block_size;
    struct sshenc *enc   = NULL;
    struct sshmac *mac   = NULL;
    struct sshcomp *comp = NULL;
    int r;

    if (state->mux)
        return ssh_packet_read_poll2_mux(ssh, typep, seqnr_p);

    *typep = SSH_MSG_NONE;

老规矩,先看看ssh_packet_read_poll32_mux。代码不长。这里会遇到几个工具函数,第一个是sshbuf_ptr。它先检查buf是否合法。这个检查非常严格,包括buf是否已经不在使用,超出大小,长度不正常,offset已经超过其偏移。这些都会导致其抛出SIGSEGV。如果ok的话,返回cd + off。cd是buffer的const data。它是另一个成员“d”的拷贝,只不过加了修饰符const使其不能被修改。当然在编译后就没有区别了。

static inline int
sshbuf_check_sanity(const struct sshbuf *buf)
{
    SSHBUF_TELL("sanity");
    if (__predict_false(buf == NULL ||
        (!buf->readonly && buf->d != buf->cd) ||
        buf->refcount < 1 || buf->refcount > SSHBUF_REFS_MAX ||
        buf->cd == NULL ||
        buf->max_size > SSHBUF_SIZE_MAX ||
        buf->alloc > buf->max_size ||
        buf->size > buf->alloc ||
        buf->off > buf->size)) {
        /* Do not try to recover from corrupted buffer internals */
        SSHBUF_DBG(("SSH_ERR_INTERNAL_ERROR"));
        ssh_signal(SIGSEGV, SIG_DFL);
        raise(SIGSEGV);
        return SSH_ERR_INTERNAL_ERROR;
    }
    return 0;
}

const u_char *
sshbuf_ptr(const struct sshbuf *buf)
{
    if (sshbuf_check_sanity(buf) != 0)
        return NULL;
    return buf->cd + buf->off;
}

static int
ssh_packet_read_poll2_mux(struct ssh *ssh, u_char *typep, u_int32_t *seqnr_p)
{
    struct session_state *state = ssh->state;
    const u_char *cp;
    size_t need;
    int r;

    if (ssh->kex)
        return SSH_ERR_INTERNAL_ERROR;
    *typep = SSH_MSG_NONE;
    cp = sshbuf_ptr(state->input);
    if (state->packlen == 0) {
        if (sshbuf_len(state->input) < 4 + 1)
            return 0; /* packet is incomplete */
        state->packlen = PEEK_U32(cp);
        if (state->packlen < 4 + 1 ||
            state->packlen > PACKET_MAX_SIZE)
            return SSH_ERR_MESSAGE_INCOMPLETE;
    }
    need = state->packlen + 4;
    if (sshbuf_len(state->input) < need)
        return 0; /* packet is incomplete */
    sshbuf_reset(state->incoming_packet);
    if ((r = sshbuf_put(state->incoming_packet, cp + 4,
        state->packlen)) != 0 ||
        (r = sshbuf_consume(state->input, need)) != 0 ||
        (r = sshbuf_get_u8(state->incoming_packet, NULL)) != 0 ||
        (r = sshbuf_get_u8(state->incoming_packet, typep)) != 0)
        return r;
    if (ssh_packet_log_type(*typep))
        debug3_f("type %u", *typep);
    /* sshbuf_dump(state->incoming_packet, stderr); */
    /* reset for next packet */
    state->packlen = 0;
    return r;
}

上面这段太长了,我们单独粘出来看。如果当前packlen是0,检查input中的长度是否大于4,大于4的话,取出32字节,作为packlen。它是一个unsigned int类型,下面随后检查大小是否合法(在5~PACKET_MAX_SIZE之间)。PACKET_MAX_SIZE是(256 * 1024)。如果不合法,返回错误。

cp = sshbuf_ptr(state->input);
if (state->packlen == 0) {
    if (sshbuf_len(state->input) < 4 + 1)
        return 0; /* packet is incomplete */
    state->packlen = PEEK_U32(cp);
    if (state->packlen < 4 + 1 ||
        state->packlen > PACKET_MAX_SIZE)
        return SSH_ERR_MESSAGE_INCOMPLETE;
}

随后,声明需要packlen+4的长度,如果太短则放弃;如果正常则开始接受数据。从cp+4处拷贝packlen的数据到incoming_packet里。这里涉及两个函数,一个是sshbuf_put,一个是sshbuf_reserve。reserve函数用于分配指定长度的数据(如果已经超过max length,则有一个奇特的pack操作,即将一半长度的buffer移动到前面去)。返回保留后的地址,修正buf的真实大小。然后返回到上一层sshbuf_put,它调用memcpy将长度为len的数据从v拷贝到p中。

int
sshbuf_reserve(struct sshbuf *buf, size_t len, u_char **dpp)
{
    u_char *dp;
    int r;

    if (dpp != NULL)
        *dpp = NULL;

    SSHBUF_DBG(("reserve buf = %p len = %zu", buf, len));
    if ((r = sshbuf_allocate(buf, len)) != 0)
        return r;

    dp = buf->d + buf->size;
    buf->size += len;
    if (dpp != NULL)
        *dpp = dp;
    return 0;
}

int
sshbuf_put(struct sshbuf *buf, const void *v, size_t len)
{
    u_char *p;
    int r;

    if ((r = sshbuf_reserve(buf, len, &p)) < 0)
        return r;
    if (len != 0)
        memcpy(p, v, len);
    return 0;
}


need = state->packlen + 4;
if (sshbuf_len(state->input) < need)
    return 0; /* packet is incomplete */
sshbuf_reset(state->incoming_packet);
if ((r = sshbuf_put(state->incoming_packet, cp + 4,
    state->packlen)) != 0 ||
    (r = sshbuf_consume(state->input, need)) != 0 ||
    (r = sshbuf_get_u8(state->incoming_packet, NULL)) != 0 ||
    (r = sshbuf_get_u8(state->incoming_packet, typep)) != 0)
    return r;

随后,调用sshbuf_consume,将刚刚读取完的need从里面去除。再调用sshbuf_get_u8连续获取两个字符,第二个作为type。获取的时候也有校验。读取完以后返回。注意这个typep是传进来的指针,这里会直接把类型给修改掉。

回到ssh_packet_read_poll2的后半段。其实差不多,只不过多了一个cipher_crypt的环节。

xz 源码阅读 - 2

上篇文章说到coder->sequence被设置成了“SEQ_BLOCK”。

// Fall through

case SEQ_BLOCK: {
    const lzma_ret ret = coder->block_decoder.code(
            coder->block_decoder.coder, allocator,
            in, in_pos, in_size, out, out_pos, out_size,
            action);

    if (ret != LZMA_STREAM_END)
        return ret;

    // Block decoded successfully. Add the new size pair to
    // the Index hash.
    return_if_error(lzma_index_hash_append(coder->index_hash,
            lzma_block_unpadded_size(
                &coder->block_options),
            coder->block_options.uncompressed_size));

    coder->sequence = SEQ_BLOCK_HEADER;
    break;
}

随后,这里就开始调用block_decoder.code。前面他被设置成了block_decode:

next->coder = coder;
next->code = &block_decode;
next->end = &block_decoder_end;
coder->next = LZMA_NEXT_CODER_INIT;

// Basic initializations
coder->sequence = SEQ_CODE;
coder->block = block;
coder->compressed_size = 0;
coder->uncompressed_size = 0;

因此,我们查看block_decode的代码。由于代码比较长,这里继续加序号阅读。

static lzma_ret
block_decode(void *coder_ptr, const lzma_allocator *allocator,
        const uint8_t *restrict in, size_t *restrict in_pos,
        size_t in_size, uint8_t *restrict out,
        size_t *restrict out_pos, size_t out_size, lzma_action action)
{
    lzma_block_coder *coder = coder_ptr;

block_decode:1. 注意这里的coder->sequence是另一个“sequence”,它最开始是被初始化成SEQ_CODE的。因此第一步从调用coder->next.code开始。

coder = coder_ptr(参数1)是上一步的coder->block_decoder.coder,因此这个调用会调用:coder->block_decoder.coder->next.coder,这些decoder由filter决定。

    switch (coder->sequence) {
    case SEQ_CODE: {
        const size_t in_start = *in_pos;
        const size_t out_start = *out_pos;

        const lzma_ret ret = coder->next.code(coder->next.coder,
                allocator, in, in_pos, in_size,
                out, out_pos, out_size, action);

        const size_t in_used = *in_pos - in_start;
        const size_t out_used = *out_pos - out_start;

        // NOTE: We compare to compressed_limit here, which prevents
        // the total size of the Block growing past LZMA_VLI_MAX.
        if (update_size(&coder->compressed_size, in_used,
                    coder->compressed_limit)
                || update_size(&coder->uncompressed_size,
                    out_used,
                    coder->block->uncompressed_size))
            return LZMA_DATA_ERROR;

block_decode:2. 调用完成后,调用lzma_check_update。确定CRC32/64并校验。完成后,设置seq为SEQ_PADDING。

        if (!coder->ignore_check)
            lzma_check_update(&coder->check, coder->block->check,
                    out + out_start, out_used);

        if (ret != LZMA_STREAM_END)
            return ret;

        // Compressed and Uncompressed Sizes are now at their final
        // values. Verify that they match the values given to us.
        if (!is_size_valid(coder->compressed_size,
                    coder->block->compressed_size)
                || !is_size_valid(coder->uncompressed_size,
                    coder->block->uncompressed_size))
            return LZMA_DATA_ERROR;

        // Copy the values into coder->block. The caller
        // may use this information to construct Index.
        coder->block->compressed_size = coder->compressed_size;
        coder->block->uncompressed_size = coder->uncompressed_size;

        coder->sequence = SEQ_PADDING;
    }

block_decode:3. 进入SEQ_PADDING状态,将字符填充到4字节对齐状态。

    // Fall through

    case SEQ_PADDING:
        // Compressed Data is padded to a multiple of four bytes.
        while (coder->compressed_size & 3) {
            if (*in_pos >= in_size)
                return LZMA_OK;

            // We use compressed_size here just get the Padding
            // right. The actual Compressed Size was stored to
            // coder->block already, and won't be modified by
            // us anymore.
            ++coder->compressed_size;

            if (in[(*in_pos)++] != 0x00)
                return LZMA_DATA_ERROR;
        }

        if (coder->block->check == LZMA_CHECK_NONE)
            return LZMA_STREAM_END;

        if (!coder->ignore_check)
            lzma_check_finish(&coder->check, coder->block->check);

        coder->sequence = SEQ_CHECK;

block_decode:4. 进入SEQ_CHECK状态,首先获取checksize,拷贝原始数据并进行比较。随后结束。

    // Fall through

    case SEQ_CHECK: {
        const size_t check_size = lzma_check_size(coder->block->check);
        lzma_bufcpy(in, in_pos, in_size, coder->block->raw_check,
                &coder->check_pos, check_size);
        if (coder->check_pos < check_size)
            return LZMA_OK;

        // Validate the Check only if we support it.
        // coder->check.buffer may be uninitialized
        // when the Check ID is not supported.
        if (!coder->ignore_check
                && lzma_check_is_supported(coder->block->check)
                && memcmp(coder->block->raw_check,
                    coder->check.buffer.u8,
                    check_size) != 0)
            return LZMA_DATA_ERROR;

        return LZMA_STREAM_END;
    }
    }

    return LZMA_PROG_ERROR;
}

回到最开始的地方,进入 coder->sequence = SEQ_BLOCK_HEADER; 状态。这是解压的上一个状态,我们跳过看下一个SEQ_INDEX。

case SEQ_INDEX: {
    // If we don't have any input, don't call
    // lzma_index_hash_decode() since it would return
    // LZMA_BUF_ERROR, which we must not do here.
    if (*in_pos >= in_size)
        return LZMA_OK;

    // Decode the Index and compare it to the hash calculated
    // from the sizes of the Blocks (if any).
    const lzma_ret ret = lzma_index_hash_decode(coder->index_hash,
            in, in_pos, in_size);
    if (ret != LZMA_STREAM_END)
        return ret;

    coder->sequence = SEQ_STREAM_FOOTER;
}

// Fall through

主代码为lzma_index_hash_decode。整体代码非常简单,这里不多做介绍了。

extern LZMA_API(lzma_ret)
lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
        size_t *in_pos, size_t in_size)
{
    // Catch zero input buffer here, because in contrast to Index encoder
    // and decoder functions, applications call this function directly
    // instead of via lzma_code(), which does the buffer checking.
    if (*in_pos >= in_size)
        return LZMA_BUF_ERROR;

    // NOTE: This function has many similarities to index_encode() and
    // index_decode() functions found from index_encoder.c and
    // index_decoder.c. See the comments especially in index_encoder.c.
    const size_t in_start = *in_pos;
    lzma_ret ret = LZMA_OK;

    while (*in_pos < in_size)
    switch (index_hash->sequence) {
    case SEQ_BLOCK:
        // Check the Index Indicator is present.
        if (in[(*in_pos)++] != 0x00)
            return LZMA_DATA_ERROR;

        index_hash->sequence = SEQ_COUNT;
        break;

    case SEQ_COUNT: {
        ret = lzma_vli_decode(&index_hash->remaining,
                &index_hash->pos, in, in_pos, in_size);
        if (ret != LZMA_STREAM_END)
            goto out;

        // The count must match the count of the Blocks decoded.
        if (index_hash->remaining != index_hash->blocks.count)
            return LZMA_DATA_ERROR;

        ret = LZMA_OK;
        index_hash->pos = 0;

        // Handle the special case when there are no Blocks.
        index_hash->sequence = index_hash->remaining == 0
                ? SEQ_PADDING_INIT : SEQ_UNPADDED;
        break;
    }

    case SEQ_UNPADDED:
    case SEQ_UNCOMPRESSED: {
        lzma_vli *size = index_hash->sequence == SEQ_UNPADDED
                ? &index_hash->unpadded_size
                : &index_hash->uncompressed_size;

        ret = lzma_vli_decode(size, &index_hash->pos,
                in, in_pos, in_size);
        if (ret != LZMA_STREAM_END)
            goto out;

        ret = LZMA_OK;
        index_hash->pos = 0;

        if (index_hash->sequence == SEQ_UNPADDED) {
            if (index_hash->unpadded_size < UNPADDED_SIZE_MIN
                    || index_hash->unpadded_size
                        > UNPADDED_SIZE_MAX)
                return LZMA_DATA_ERROR;

            index_hash->sequence = SEQ_UNCOMPRESSED;
        } else {
            // Update the hash.
            return_if_error(hash_append(&index_hash->records,
                    index_hash->unpadded_size,
                    index_hash->uncompressed_size));

            // Verify that we don't go over the known sizes. Note
            // that this validation is simpler than the one used
            // in lzma_index_hash_append(), because here we know
            // that values in index_hash->blocks are already
            // validated and we are fine as long as we don't
            // exceed them in index_hash->records.
            if (index_hash->blocks.blocks_size
                    < index_hash->records.blocks_size
                    || index_hash->blocks.uncompressed_size
                    < index_hash->records.uncompressed_size
                    || index_hash->blocks.index_list_size
                    < index_hash->records.index_list_size)
                return LZMA_DATA_ERROR;

            // Check if this was the last Record.
            index_hash->sequence = --index_hash->remaining == 0
                    ? SEQ_PADDING_INIT : SEQ_UNPADDED;
        }

        break;
    }

    case SEQ_PADDING_INIT:
        index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded(
                index_hash->records.count,
                index_hash->records.index_list_size)) & 3;
        index_hash->sequence = SEQ_PADDING;

    // Fall through

    case SEQ_PADDING:
        if (index_hash->pos > 0) {
            --index_hash->pos;
            if (in[(*in_pos)++] != 0x00)
                return LZMA_DATA_ERROR;

            break;
        }

        // Compare the sizes.
        if (index_hash->blocks.blocks_size
                != index_hash->records.blocks_size
                || index_hash->blocks.uncompressed_size
                != index_hash->records.uncompressed_size
                || index_hash->blocks.index_list_size
                != index_hash->records.index_list_size)
            return LZMA_DATA_ERROR;

        // Finish the hashes and compare them.
        lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST);
        lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST);
        if (memcmp(index_hash->blocks.check.buffer.u8,
                index_hash->records.check.buffer.u8,
                lzma_check_size(LZMA_CHECK_BEST)) != 0)
            return LZMA_DATA_ERROR;

        // Finish the CRC32 calculation.
        index_hash->crc32 = lzma_crc32(in + in_start,
                *in_pos - in_start, index_hash->crc32);

        index_hash->sequence = SEQ_CRC32;

    // Fall through

    case SEQ_CRC32:
        do {
            if (*in_pos == in_size)
                return LZMA_OK;

            if (((index_hash->crc32 >> (index_hash->pos * 8))
                    & 0xFF) != in[(*in_pos)++])
                return LZMA_DATA_ERROR;

        } while (++index_hash->pos < 4);

        return LZMA_STREAM_END;

    default:
        assert(0);
        return LZMA_PROG_ERROR;
    }

out:
    // Update the CRC32,
    index_hash->crc32 = lzma_crc32(in + in_start,
            *in_pos - in_start, index_hash->crc32);

    return ret;
}

回到上一层,状态SEQ_STREAM_FOOTER,代码如下。

case SEQ_STREAM_FOOTER: {
    // Copy the Stream Footer to the internal buffer.
    lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
            LZMA_STREAM_HEADER_SIZE);

    // Return if we didn't get the whole Stream Footer yet.
    if (coder->pos < LZMA_STREAM_HEADER_SIZE)
        return LZMA_OK;

    coder->pos = 0;

    // Decode the Stream Footer. The decoder gives
    // LZMA_FORMAT_ERROR if the magic bytes don't match,
    // so convert that return code to LZMA_DATA_ERROR.
    lzma_stream_flags footer_flags;

lzma_stream_footer_decode用于解码footer,解码flags并设置backward_size。校验footer size,并返回结果。

    const lzma_ret ret = lzma_stream_footer_decode(
            &footer_flags, coder->buffer);
    if (ret != LZMA_OK)
        return ret == LZMA_FORMAT_ERROR
                ? LZMA_DATA_ERROR : ret;

    // Check that Index Size stored in the Stream Footer matches
    // the real size of the Index field.
    if (lzma_index_hash_size(coder->index_hash)
            != footer_flags.backward_size)
        return LZMA_DATA_ERROR;

    // Compare that the Stream Flags fields are identical in
    // both Stream Header and Stream Footer.
    return_if_error(lzma_stream_flags_compare(
            &coder->stream_flags, &footer_flags));

    if (!coder->concatenated)
        return LZMA_STREAM_END;

    coder->sequence = SEQ_STREAM_PADDING;
}

// Fall through

然后是SEQ_STREAM_PADDING状态。这个状态也没有什么新奇的东西,就是检查要多少个字节的padding。

    case SEQ_STREAM_PADDING:
        assert(coder->concatenated);

        // Skip over possible Stream Padding.
        while (true) {
            if (*in_pos >= in_size) {
                // Unless LZMA_FINISH was used, we cannot
                // know if there's more input coming later.
                if (action != LZMA_FINISH)
                    return LZMA_OK;

                // Stream Padding must be a multiple of
                // four bytes.
                return coder->pos == 0
                        ? LZMA_STREAM_END
                        : LZMA_DATA_ERROR;
            }

            // If the byte is not zero, it probably indicates
            // beginning of a new Stream (or the file is corrupt).
            if (in[*in_pos] != 0x00)
                break;

            ++*in_pos;
            coder->pos = (coder->pos + 1) & 3;
        }

        // Stream Padding must be a multiple of four bytes (empty
        // Stream Padding is OK).
        if (coder->pos != 0) {
            ++*in_pos;
            return LZMA_DATA_ERROR;
        }

        // Prepare to decode the next Stream.
        return_if_error(stream_decoder_reset(coder, allocator));
        break;

    default:
        assert(0);
        return LZMA_PROG_ERROR;
    }

    // Never reached
}

xz 源码阅读 - 1

继续开坑xz。这次看的是https://sourceforge.net/projects/lzmautils/下的文件(不是go语言写的那个)。根据其示例代码,要解压xz程序,入口是“lzma_code”,让我们从这里开始。

先介绍一下背景。要解压xz文件,需要设置一个lzma_stream对象,并将其next_in设置为数据,avail_in设置为数据长度,然后调用lzma_code开始解析。

截屏2022-01-13 下午7.23.49.png

为了举例,先从I:\xz-5.2.5\doc\examples_old\xz_pipe_decomp.c:xz_decompress这个函数看起。

/* note: in_file and out_file must be open already */
int xz_decompress (FILE *in_file, FILE *out_file)
{
    lzma_stream strm = LZMA_STREAM_INIT; /* alloc and init lzma_stream struct */
        ……

    /* initialize xz decoder */
    ret_xz = lzma_stream_decoder (&strm, memory_limit, flags);
    if (ret_xz != LZMA_OK) {
        fprintf (stderr, "lzma_stream_decoder error: %d\n", (int) ret_xz);
        return RET_ERROR_INIT;
    }

lzma_stream_decoder 调用lzma_next_strm_init,

extern LZMA_API(lzma_ret)
lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
{
    lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags);

    strm->internal->supported_actions[LZMA_RUN] = true;
    strm->internal->supported_actions[LZMA_FINISH] = true;

    return LZMA_OK;
}

对应的,宏定义如下,所以相当于1、lzma_strm_init(strm);;2、lzma_stream_decoder_init(strm->internal->next, strm->allocator, ...args...);

/// Initializes lzma_strm and calls func() to initialize strm->internal->next.
/// (The function being called will use lzma_next_coder_init()). If
/// initialization fails, memory that wasn't freed by func() is freed
/// along strm->internal.
#define lzma_next_strm_init(func, strm, ...) \
do { \
    return_if_error(lzma_strm_init(strm)); \
    const lzma_ret ret_ = func(&(strm)->internal->next, \
            (strm)->allocator, __VA_ARGS__); \
    if (ret_ != LZMA_OK) { \
        lzma_end(strm); \
        return ret_; \
    } \
} while (0)

#endif

调用的lzma_stream_decoder_init如下。

extern lzma_ret
lzma_stream_decoder_init(
        lzma_next_coder *next, const lzma_allocator *allocator,
        uint64_t memlimit, uint32_t flags)
{
    lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator);
    if (flags & ~LZMA_SUPPORTED_FLAGS)
        return LZMA_OPTIONS_ERROR;

lzma_next_coder_init也是一个宏,它设置next->init,也就是这里的strm->internal->next->init = lzma_stream_decoder_init

然后下面开始初始化strm->internal->next->coder,并且设置strm->internal->next->code为“stream_decode”等,如代码所示。

    lzma_stream_coder *coder = next->coder;
    if (coder == NULL) {
        coder = lzma_alloc(sizeof(lzma_stream_coder), allocator);
        if (coder == NULL)
            return LZMA_MEM_ERROR;

        next->coder = coder;
        next->code = &stream_decode;
        next->end = &stream_decoder_end;
        next->get_check = &stream_decoder_get_check;
        next->memconfig = &stream_decoder_memconfig;

        coder->block_decoder = LZMA_NEXT_CODER_INIT;
        coder->index_hash = NULL;
    }

    coder->memlimit = my_max(1, memlimit);
    coder->memusage = LZMA_MEMUSAGE_BASE;
    coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0;
    coder->tell_unsupported_check
            = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
    coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
    coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
    coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
    coder->first_stream = true;

    return stream_decoder_reset(coder, allocator);
}

回到xz_decompress中。准备好输入数据后,调用lzma_decode。

while ((! in_finished) && (! out_finished)) {
    /* read incoming data */
    in_len = fread (in_buf, 1, IN_BUF_MAX, in_file);

    if (feof (in_file)) {
        in_finished = true;
    }
    if (ferror (in_file)) {
        in_finished = true;
        ret = RET_ERROR_INPUT;
    }

    strm.next_in = in_buf;
    strm.avail_in = in_len;

    /* if no more data from in_buf, flushes the
       internal xz buffers and closes the decompressed data
       with LZMA_FINISH */
    action = in_finished ? LZMA_FINISH : LZMA_RUN;

    /* loop until there's no pending decompressed output */
    do {
        /* out_buf is clean at this point */
        strm.next_out = out_buf;
        strm.avail_out = OUT_BUF_MAX;

        /* decompress data */
        ret_xz = lzma_code (&strm, action);

lzma_code的代码截取如下。第一部分是有效性检查,分别检查lzma_stream是否正确设置(internal->next.code需要由其他函数设置成LZMA_NEXT_CODER_INIT)。

extern LZMA_API(lzma_ret)
lzma_code(lzma_stream *strm, lzma_action action)
{
    // Sanity checks
    if ((strm->next_in == NULL && strm->avail_in != 0)
            || (strm->next_out == NULL && strm->avail_out != 0)
            || strm->internal == NULL
            || strm->internal->next.code == NULL
            || (unsigned int)(action) > LZMA_ACTION_MAX
            || !strm->internal->supported_actions[action])
        return LZMA_PROG_ERROR;

    // Check if unsupported members have been set to non-zero or non-NULL,
    // which would indicate that some new feature is wanted.
    if (strm->reserved_ptr1 != NULL
            || strm->reserved_ptr2 != NULL
            || strm->reserved_ptr3 != NULL
            || strm->reserved_ptr4 != NULL
            || strm->reserved_int1 != 0
            || strm->reserved_int2 != 0
            || strm->reserved_int3 != 0
            || strm->reserved_int4 != 0
            || strm->reserved_enum1 != LZMA_RESERVED_ENUM
            || strm->reserved_enum2 != LZMA_RESERVED_ENUM)
        return LZMA_OPTIONS_ERROR;

检查通过后,是一个stream中sequence的检查。可以看出来xz由各种状态机组成。这个sequence代表其动作目前的推进状态。

switch (strm->internal->sequence) {
case ISEQ_RUN:
    switch (action) {
    case LZMA_RUN:
        break;

    case LZMA_SYNC_FLUSH:
        strm->internal->sequence = ISEQ_SYNC_FLUSH;
        break;

    case LZMA_FULL_FLUSH:
        strm->internal->sequence = ISEQ_FULL_FLUSH;
        break;

    case LZMA_FINISH:
        strm->internal->sequence = ISEQ_FINISH;
        break;

    case LZMA_FULL_BARRIER:
        strm->internal->sequence = ISEQ_FULL_BARRIER;
        break;
    }

    break;

case ISEQ_SYNC_FLUSH:
    // The same action must be used until we return
    // LZMA_STREAM_END, and the amount of input must not change.
    if (action != LZMA_SYNC_FLUSH
            || strm->internal->avail_in != strm->avail_in)
        return LZMA_PROG_ERROR;

    break;

case ISEQ_FULL_FLUSH:
    if (action != LZMA_FULL_FLUSH
            || strm->internal->avail_in != strm->avail_in)
        return LZMA_PROG_ERROR;

    break;

case ISEQ_FINISH:
    if (action != LZMA_FINISH
            || strm->internal->avail_in != strm->avail_in)
        return LZMA_PROG_ERROR;

    break;

case ISEQ_FULL_BARRIER:
    if (action != LZMA_FULL_BARRIER
            || strm->internal->avail_in != strm->avail_in)
        return LZMA_PROG_ERROR;

    break;

case ISEQ_END:
    return LZMA_STREAM_END;

case ISEQ_ERROR:
default:
    return LZMA_PROG_ERROR;
}

状态操作结束后,调用其next.code()函数来处理。code是一个“lzma_code_function”类型,在每种不同的解码器初始化时,都会初始化自己的结构。

size_t in_pos = 0;
size_t out_pos = 0;
lzma_ret ret = strm->internal->next.code(
        strm->internal->next.coder, strm->allocator,
        strm->next_in, &in_pos, strm->avail_in,
        strm->next_out, &out_pos, strm->avail_out, action);

例如block decoder的:

    next->coder = coder;
    next->code = &block_decode;
    next->end = &block_decoder_end;
    coder->next = LZMA_NEXT_CODER_INIT;

index decoder的:

    next->coder = coder;
    next->code = &index_decode;
    next->end = &index_decoder_end;
    next->memconfig = &index_decoder_memconfig;
    coder->index = NULL;

xz支持的一共有:alone decoder、auto decoder、block decoder、index decoder、stream decoder、delta decoder、lz decoder和它们对应的encoder。xz也支持一个simple decoder,位于simple_coder.c。

以stream_decode为例。进入后,检查coder->sequence。

static lzma_ret
stream_decode(void *coder_ptr, const lzma_allocator *allocator,
        const uint8_t *restrict in, size_t *restrict in_pos,
        size_t in_size, uint8_t *restrict out,
        size_t *restrict out_pos, size_t out_size, lzma_action action)
{
    lzma_stream_coder *coder = coder_ptr;

    // When decoding the actual Block, it may be able to produce more
    // output even if we don't give it any new input.
    while (true)
    switch (coder->sequence) {

第一步通常是SEQ_STREAM_HEADER。它分别:

case SEQ_STREAM_HEADER: {
    // Copy the Stream Header to the internal buffer.
    lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
            LZMA_STREAM_HEADER_SIZE);
  1. 调用lzma_bufcpy拷贝LZMA_STREAM_HEADER_SIZE(12)字节的内容到coder->buffer中。lzma_bufcpy的参数含义是(in, in_pos, in_size, out, out_pos, out_size)。但是在拷贝前会检查源和目标剩余空间(in_avail, out_avail)是否够用。如果长度不够则退出。

    // Return if we didn't get the whole Stream Header yet.
    if (coder->pos < LZMA_STREAM_HEADER_SIZE)
        return LZMA_OK;
    
    coder->pos = 0;
    
  2. 解码头部信息。

    // Decode the Stream Header.
    const lzma_ret ret = lzma_stream_header_decode(
            &coder->stream_flags, coder->buffer);
    if (ret != LZMA_OK)
        return ret == LZMA_FORMAT_ERROR && !coder->first_stream
                ? LZMA_DATA_ERROR : ret;
    

lzma_stream_header_decode的代码如下:

extern LZMA_API(lzma_ret)
lzma_stream_header_decode(lzma_stream_flags *options, const uint8_t *in)
{

-- 2.1 比较magic

    // Magic
    if (memcmp(in, lzma_header_magic, sizeof(lzma_header_magic)) != 0)
        return LZMA_FORMAT_ERROR;

-- 2.2 比较in + 6的2字节CRC值和in + 6 + 2处保存的是否一致。

    // Verify the CRC32 so we can distinguish between corrupt
    // and unsupported files.
    const uint32_t crc = lzma_crc32(in + sizeof(lzma_header_magic),
            LZMA_STREAM_FLAGS_SIZE, 0);
    if (crc != read32le(in + sizeof(lzma_header_magic)
            + LZMA_STREAM_FLAGS_SIZE))
        return LZMA_DATA_ERROR;

-- 2.3 解码flags。其实只用来确定options->check = in1 & 0x0f。

    // Stream Flags
    if (stream_flags_decode(options, in + sizeof(lzma_header_magic)))
        return LZMA_OPTIONS_ERROR;

    // Set Backward Size to indicate unknown value. That way
    // lzma_stream_flags_compare() can be used to compare Stream Header
    // and Stream Footer while keeping it useful also for comparing
    // two Stream Footers.
    options->backward_size = LZMA_VLI_UNKNOWN;

    return LZMA_OK;
}
  1. 拷贝刚才获取到的options->check,并进入下一个sequence。

    // If we are decoding concatenated Streams, and the later
    // Streams have invalid Header Magic Bytes, we give
    // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR.
    coder->first_stream = false;
    
    // Copy the type of the Check so that Block Header and Block
    // decoders see it.
    coder->block_options.check = coder->stream_flags.check;
    
    // Even if we return LZMA_*_CHECK below, we want
    // to continue from Block Header decoding.
    coder->sequence = SEQ_BLOCK_HEADER;
    
    // Detect if there's no integrity check or if it is
    // unsupported if those were requested by the application.
    if (coder->tell_no_check && coder->stream_flags.check
            == LZMA_CHECK_NONE)
        return LZMA_NO_CHECK;
    
    if (coder->tell_unsupported_check
            && !lzma_check_is_supported(
                coder->stream_flags.check))
        return LZMA_UNSUPPORTED_CHECK;
    
    if (coder->tell_any_check)
        return LZMA_GET_CHECK;
    

    }

两个sequence中间没有break,所以直接进入。这里我们也重新计数。

  1. 如果in[*in_pos]为0,则退出当前处理,设置sequence为SEQ_INDEX。如果不是则调用lzma_block_header_size_decode宏进行处理。值为 (x + 1) * 4 。

    // Fall through

    case SEQ_BLOCK_HEADER: {
    if (*in_pos >= in_size)
    return LZMA_OK;

    if (coder->pos == 0) {
        // Detect if it's Index.
        if (in[*in_pos] == 0x00) {
            coder->sequence = SEQ_INDEX;
            break;
        }
    
        // Calculate the size of the Block Header. Note that
        // Block Header decoder wants to see this byte too
        // so don't advance *in_pos.
        coder->block_options.header_size
                = lzma_block_header_size_decode(
                    in[*in_pos]);
    }
    
  2. 拷贝声明的header_size到code->buffer中。

    // Copy the Block Header to the internal buffer.
    lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
            coder->block_options.header_size);
    
    // Return if we didn't get the whole Block Header yet.
    if (coder->pos < coder->block_options.header_size)
        return LZMA_OK;
    
    coder->pos = 0;
    
    // Version 1 is needed to support the .ignore_check option.
    coder->block_options.version = 1;
    
  3. 设置一个长度为LZMA_FILTERS_MAX + 1(4 + 1)的filters buffer。调用lzma_block_header_decoder解析头信息。

    // Set up a buffer to hold the filter chain. Block Header
    // decoder will initialize all members of this array so
    // we don't need to do it here.
    lzma_filter filters[LZMA_FILTERS_MAX + 1];
    coder->block_options.filters = filters;
    
    // Decode the Block Header.
    return_if_error(lzma_block_header_decode(&coder->block_options,
            allocator, coder->buffer));
    

3.1 lzma_block_header_decoder的定义如下。初始化所有的filters。

extern LZMA_API(lzma_ret)
lzma_block_header_decode(lzma_block *block,
        const lzma_allocator *allocator, const uint8_t *in)
{
    // NOTE: We consider the header to be corrupt not only when the
    // CRC32 doesn't match, but also when variable-length integers
    // are invalid or over 63 bits, or if the header is too small
    // to contain the claimed information.

    // Initialize the filter options array. This way the caller can
    // safely free() the options even if an error occurs in this function.
    for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) {
        block->filters[i].id = LZMA_VLI_UNKNOWN;
        block->filters[i].options = NULL;
    }

    // Versions 0 and 1 are supported. If a newer version was specified,
    // we need to downgrade it.
    if (block->version > 1)
        block->version = 1;

    // This isn't a Block Header option, but since the decompressor will
    // read it if version >= 1, it's better to initialize it here than
    // to expect the caller to do it since in almost all cases this
    // should be false.
    block->ignore_check = false;

3.2 调用lzma_block_header_size_decode宏(复习一下, (x+1) * 4)来对比数据。并校验节的crc32。

    // Validate Block Header Size and Check type. The caller must have
    // already set these, so it is a programming error if this test fails.
    if (lzma_block_header_size_decode(in[0]) != block->header_size
            || (unsigned int)(block->check) > LZMA_CHECK_ID_MAX)
        return LZMA_PROG_ERROR;

    // Exclude the CRC32 field.
    const size_t in_size = block->header_size - 4;

    // Verify CRC32
    if (lzma_crc32(in, in_size, 0) != read32le(in + in_size))
        return LZMA_DATA_ERROR;

    // Check for unsupported flags.
    if (in[1] & 0x3C)
        return LZMA_OPTIONS_ERROR;

    // Start after the Block Header Size and Block Flags fields.
    size_t in_pos = 2;

3.3 这里出现了一个lzma_vli_decode函数。vli代表“variable length integer”。变长整数的范围是0~ 0x7fffffff`ffffffff,最长耗费9字节。lzma_vli_decode(vli, vli_pos, in, in_pos, in_size)会做一些校验,并正确实现转换。

因此这里获取“compressed_size”,是压缩后的大小,并校验是否对齐以及是否过长。当然如代码所示,也可以是未知大小。

    // Compressed Size
    if (in[1] & 0x40) {
        return_if_error(lzma_vli_decode(&block->compressed_size,
                NULL, in, &in_pos, in_size));

        // Validate Compressed Size. This checks that it isn't zero
        // and that the total size of the Block is a valid VLI.
        if (lzma_block_unpadded_size(block) == 0)
            return LZMA_DATA_ERROR;
    } else {
        block->compressed_size = LZMA_VLI_UNKNOWN;
    }

3.4 同样的,获取解压后的大小。

    // Uncompressed Size
    if (in[1] & 0x80)
        return_if_error(lzma_vli_decode(&block->uncompressed_size,
                NULL, in, &in_pos, in_size));
    else
        block->uncompressed_size = LZMA_VLI_UNKNOWN;

3.5 接下来处理各种filter。最多可以有4个。

    // Filter Flags
    const size_t filter_count = (in[1] & 3U) + 1;
    for (size_t i = 0; i < filter_count; ++i) {
        const lzma_ret ret = lzma_filter_flags_decode(
                &block->filters[i], allocator,
                in, &in_pos, in_size);
        if (ret != LZMA_OK) {
            free_properties(block, allocator);
            return ret;
        }
    }

3.5.1 lzma_filter_flags_decode稍微麻烦点,贴一下代码看看。首先,解码filter id。id最大序号是1 << 62 - 1。 然后,解码property size。property size最大不能超过剩余长度。然后调用lzma_properties_decode进一步解析属性。

extern LZMA_API(lzma_ret)
lzma_filter_flags_decode(
        lzma_filter *filter, const lzma_allocator *allocator,
        const uint8_t *in, size_t *in_pos, size_t in_size)
{
    // Set the pointer to NULL so the caller can always safely free it.
    filter->options = NULL;

    // Filter ID
    return_if_error(lzma_vli_decode(&filter->id, NULL,
            in, in_pos, in_size));

    if (filter->id >= LZMA_FILTER_RESERVED_START)
        return LZMA_DATA_ERROR;

    // Size of Properties
    lzma_vli props_size;
    return_if_error(lzma_vli_decode(&props_size, NULL,
            in, in_pos, in_size));

    // Filter Properties
    if (in_size - *in_pos < props_size)
        return LZMA_DATA_ERROR;

    const lzma_ret ret = lzma_properties_decode(
            filter, allocator, in + *in_pos, props_size);

    *in_pos += props_size;

    return ret;
}

3.5.2 lzma_properties_decode代码如下。对filter->id搜索合适的decoder。根据xz的配置可以有不同的decoder,Linux中的liblzma.so.5.2.4支持全部9种decoder,逆向结果如下(代码被高度优化,但结果就是会遍历9项,而decoder最多也支持9种)。

每个decoder由四个段组成,分别是{.id = xx, .init = 初始化函数, .memusage = null 或者对应函数, .props_decode = props_decode函数(通常都是lzma_simple_props_decode,少数不同)}。

__int64 __fastcall lzma_properties_decode(_QWORD *a1, __int64 a2, __int64 a3, __int64 a4)
{
  ……
  a1[1] = 0LL;
  v4 = 0LL;
  for ( i = 0x4000000000000001LL; *a1 != i; i = *((_QWORD *)&unk_26CC0 + 4 * v4) )
  {
    if ( ++v4 == 9 )
      return 8LL;
  }
  ……
}

extern LZMA_API(lzma_ret)
lzma_properties_decode(lzma_filter *filter, const lzma_allocator *allocator,
        const uint8_t *props, size_t props_size)
{
    // Make it always NULL so that the caller can always safely free() it.
    filter->options = NULL;

    const lzma_filter_decoder *const fd = decoder_find(filter->id);
    if (fd == NULL)
        return LZMA_OPTIONS_ERROR;

    if (fd->props_decode == NULL)
        return props_size == 0 ? LZMA_OK : LZMA_OPTIONS_ERROR;

最后调用对应的props_decode函数。

    return fd->props_decode(
            &filter->options, allocator, props, props_size);
}

3.5.2.1 为了方便后续理解,这里把三种都读一遍。第一个是lzma_lzma_props_decode。要求prop_size为5,

extern lzma_ret
lzma_lzma_props_decode(void **options, const lzma_allocator *allocator,
        const uint8_t *props, size_t props_size)
{
    if (props_size != 5)
        return LZMA_OPTIONS_ERROR;

    lzma_options_lzma *opt
            = lzma_alloc(sizeof(lzma_options_lzma), allocator);
    if (opt == NULL)
        return LZMA_MEM_ERROR;

3.5.2.2 调用lzma_lzma_lclppb_decode。字节最多不超过24*9+8=224。然后设置pb/lp/lc,说是要看规范,这里先不管了。

    if (lzma_lzma_lclppb_decode(opt, props[0]))
        goto error;

extern bool
lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
{
    if (byte > (4 * 5 + 4) * 9 + 8)
        return true;

    // See the file format specification to understand this.
    options->pb = byte / (9 * 5);
    byte -= options->pb * 9 * 5;
    options->lp = byte / 9;
    options->lc = byte - options->lp * 9;

    return options->lc + options->lp > LZMA_LCLP_MAX;
}

3.5.2.3 接下来看另一个,lzma_lzma2_props_decode。要求prop_size为1,该属性决定其字典大小。

extern lzma_ret
lzma_lzma2_props_decode(void **options, const lzma_allocator *allocator,
        const uint8_t *props, size_t props_size)
{
    if (props_size != 1)
        return LZMA_OPTIONS_ERROR;

    // Check that reserved bits are unset.
    if (props[0] & 0xC0)
        return LZMA_OPTIONS_ERROR;

    // Decode the dictionary size.
    if (props[0] > 40)
        return LZMA_OPTIONS_ERROR;

    lzma_options_lzma *opt = lzma_alloc(
            sizeof(lzma_options_lzma), allocator);
    if (opt == NULL)
        return LZMA_MEM_ERROR;

    if (props[0] == 40) {
        opt->dict_size = UINT32_MAX;
    } else {
        opt->dict_size = 2 | (props[0] & 1U);
        opt->dict_size <<= props[0] / 2U + 11;
    }

    opt->preset_dict = NULL;
    opt->preset_dict_size = 0;

    *options = opt;

    return LZMA_OK;
}

3.5.2.4 最后是lzma_simple_props_decode。大小可以为0~4字节。可以用来设置start_offset。

extern lzma_ret
lzma_simple_props_decode(void **options, const lzma_allocator *allocator,
        const uint8_t *props, size_t props_size)
{
    if (props_size == 0)
        return LZMA_OK;

    if (props_size != 4)
        return LZMA_OPTIONS_ERROR;

    lzma_options_bcj *opt = lzma_alloc(
            sizeof(lzma_options_bcj), allocator);
    if (opt == NULL)
        return LZMA_MEM_ERROR;

    opt->start_offset = read32le(props);

    // Don't leave an options structure allocated if start_offset is zero.
    if (opt->start_offset == 0)
        lzma_free(opt, allocator);
    else
        *options = opt;

    return LZMA_OK;
}

3.5.2.5 回到上层lzma_lzma_props_decode中,设置dict_size,函数退出。

    // All dictionary sizes are accepted, including zero. LZ decoder
    // will automatically use a dictionary at least a few KiB even if
    // a smaller dictionary is requested.
    opt->dict_size = read32le(props + 1);

    opt->preset_dict = NULL;
    opt->preset_dict_size = 0;

    *options = opt;

    return LZMA_OK;

error:
    lzma_free(opt, allocator);
    return LZMA_OPTIONS_ERROR;
}

3.5.3 处理剩余padding部分
// Padding
while (in_pos < in_size) {
if (in[in_pos++] != 0x00) {
free_properties(block, allocator);

            // Possibly some new field present so use
            // LZMA_OPTIONS_ERROR instead of LZMA_DATA_ERROR.
            return LZMA_OPTIONS_ERROR;
        }
    }

    return LZMA_OK;
}

3.6 终于返回最外层,stream_decode里面。循环遍历,如果有内存消耗计算器则调用并添加,如果没有则用1024近似替代。其实memusage函数也很简单,就是统计结构体+字典的内存占用。

    // If LZMA_IGNORE_CHECK was used, this flag needs to be set.
    // It has to be set after lzma_block_header_decode() because
    // it always resets this to false.
    coder->block_options.ignore_check = coder->ignore_check;

    // Check the memory usage limit.
    const uint64_t memusage = lzma_raw_decoder_memusage(filters);
    lzma_ret ret;


extern uint64_t
lzma_lz_decoder_memusage(size_t dictionary_size)
{
    return sizeof(lzma_coder) + (uint64_t)(dictionary_size);
}

3.7 这个主要为了避免字典过大(超过coder->memlimit)。如果一切ok,则调用lzma_block_decoder_init。

    if (memusage == UINT64_MAX) {
        // One or more unknown Filter IDs.
        ret = LZMA_OPTIONS_ERROR;
    } else {
        // Now we can set coder->memusage since we know that
        // the filter chain is valid. We don't want
        // lzma_memusage() to return UINT64_MAX in case of
        // invalid filter chain.
        coder->memusage = memusage;

        if (memusage > coder->memlimit) {
            // The chain would need too much memory.
            ret = LZMA_MEMLIMIT_ERROR;
        } else {
            // Memory usage is OK.
            // Initialize the Block decoder.
            ret = lzma_block_decoder_init(
                    &coder->block_decoder,
                    allocator,
                    &coder->block_options);
        }
    }

3.7.1 lzma_block_decoder_init调用lzma_next_code_init来设置next->init为lzma_block_decoder_init。然后对数据进行校验。这个函数和最上面介绍的lzma_stream_decoder_init其实很像。

extern lzma_ret
lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
        lzma_block *block)
{
    lzma_next_coder_init(&lzma_block_decoder_init, next, allocator);

    // Validate the options. lzma_block_unpadded_size() does that for us
    // except for Uncompressed Size and filters. Filters are validated
    // by the raw decoder.
    if (lzma_block_unpadded_size(block) == 0
            || !lzma_vli_is_valid(block->uncompressed_size))
        return LZMA_PROG_ERROR;

3.7.2 如果没有next->coder则初始化它。code设置为block_decode。然后进行其他的初始化。

// Allocate *next->coder if needed.
lzma_block_coder *coder = next->coder;
if (coder == NULL) {
    coder = lzma_alloc(sizeof(lzma_block_coder), allocator);
    if (coder == NULL)
        return LZMA_MEM_ERROR;

    next->coder = coder;
    next->code = &block_decode;
    next->end = &block_decoder_end;
    coder->next = LZMA_NEXT_CODER_INIT;
}

// Basic initializations
coder->sequence = SEQ_CODE;
coder->block = block;
coder->compressed_size = 0;
coder->uncompressed_size = 0;

// If Compressed Size is not known, we calculate the maximum allowed
// value so that encoded size of the Block (including Block Padding)
// is still a valid VLI and a multiple of four.
coder->compressed_limit
        = block->compressed_size == LZMA_VLI_UNKNOWN
            ? (LZMA_VLI_MAX & ~LZMA_VLI_C(3))
                - block->header_size
                - lzma_check_size(block->check)
            : block->compressed_size;

3.7.3 最后一部分是对lzma_check_init的调用,其实就是初始化crc32/64对应的字段为0。然后循环调用lzma_raw_decoder_init,直到所有的filter都处理完成。

    // Initialize the check. It's caller's problem if the Check ID is not
    // supported, and the Block decoder cannot verify the Check field.
    // Caller can test lzma_check_is_supported(block->check).
    coder->check_pos = 0;
    lzma_check_init(&coder->check, block->check);

    coder->ignore_check = block->version >= 1
            ? block->ignore_check : false;

    // Initialize the filter chain.
    return lzma_raw_decoder_init(&coder->next, allocator,
            block->filters);
}

3.8 回到外层。清理之前的临时filters对象。并设置序列状态为SEQ_BLOCK。

    // Free the allocated filter options since they are needed
    // only to initialize the Block decoder.
    for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i)
        lzma_free(filters[i].options, allocator);

    coder->block_options.filters = NULL;

    // Check if memory usage calculation and Block enocoder
    // initialization succeeded.
    if (ret != LZMA_OK)
        return ret;

    coder->sequence = SEQ_BLOCK;
}

这一节太长了,开一个新文章继续。

psproc 源码阅读 - 5

剩余的代码中,最重要的是show_one_proc,它读取结构体并解析其中的一项,调用其print函数打印到屏幕上。

/********** show one process (NULL proc prints header) **********/

//#define SPACE_AMOUNT page_size
#define SPACE_AMOUNT 144

static char *saved_outbuf;

void show_one_proc(const proc_t *restrict const p, const format_node *restrict fmt) {
    /* unknown: maybe set correct & actual to 1, remove +/- 1 below */
    int correct  = 0;  /* screen position we should be at */
    int actual   = 0;  /* screen position we are at */
    int amount   = 0;  /* amount of text that this data is */
    int leftpad  = 0;  /* amount of space this column _could_ need */
    int space    = 0;  /* amount of space we actually need to print */
    int dospace  = 0;  /* previous column determined that we need a space */
    int legit    = 0;  /* legitimately stolen extra space */
    int sz       = 0;  /* real size of data in outbuffer */
    int tmpspace = 0;
    char *restrict const outbuf = saved_outbuf;
    static int did_stuff = 0;  /* have we ever printed anything? */

p为-1时是其最后一次调用。如果仍然有内容,则打印剩余的内容。

    if(-1==(long)p) {   /* true only once, at the end */
        if(did_stuff) return;
        /* have _never_ printed anything, but might need a header */
        if(!--lines_to_next_header) {
            lines_to_next_header = header_gap;
            show_one_proc(NULL,fmt);
        }
        /* fprintf(stderr, "No processes available.\n"); */  /* legal? */
        exit(1);
    }

如果是其他情况,则生成这样的递归栈输出fmt。fmt(format node)就是那一组预定义的标头和格式化的数组。

    if(p) { /* not header, maybe we should call ourselves for it */
        if(!--lines_to_next_header) {
            lines_to_next_header = header_gap;
            show_one_proc(NULL,fmt);
        }
    }
    did_stuff = 1;
    if(active_cols>(int)OUTBUF_SIZE) fprintf(stderr,_("fix bigness error\n"));

    /* print row start sequence */
    for(;;) {
        legit = 0; 

        if(fmt->next) {
            max_rightward = fmt->width;
            tmpspace = 0;
        } else {
            tmpspace = correct-actual;
            if (tmpspace<1) {
                tmpspace = dospace;
                max_rightward = active_cols-actual-tmpspace;
            } else {
                max_rightward = active_cols - ( (correct>actual) ? correct : actual );
            }
        }
        if(max_rightward <= 0) max_rightward = 0;
        else if(max_rightward >= OUTBUF_SIZE) max_rightward = OUTBUF_SIZE-1;

        max_leftward  = fmt->width + actual - correct; /* TODO check this */
        if(max_leftward <= 0) max_leftward = 0;
        else if(max_leftward >= OUTBUF_SIZE) max_leftward = OUTBUF_SIZE-1;

计算完位置后,调用fmt的print函数来处理。举例阅读pr_wchan。

static int pr_wchan(char *restrict const outbuf, const proc_t *restrict const pp) {
    const char *w;
    size_t len;
    setREL1(WCHAN_NAME)  //<-- 如果没有设置outbuf,设置rel_WCHAN_NAME,设置完会退出。如果outbuf有值则不管。 这里outbuf是由saved_outbuf(= outbuf + SPACE_AMOUNT == 144,还记得最早的时候初始化的那个带保护页的区域吗……)传来的,因此有值。
    w = rSv(WCHAN_NAME, str, pp); //<-- rSv复习一下,就是pp->head[rel_WCHAN_NAME].result.str。

这个属性是由setDECL设置的。

setDECL(WCHAN_NAME)     {
    freNAME(str)(R);
    if (!(R->result.str = strdup(lookup_wchan(P->tid)))) I->seterr = 1;;
}

回到原函数中,剩余的就是将数据拷贝到outbuf中。

    len = strlen(w);
    if(len>max_rightward) len=max_rightward;
    memcpy(outbuf, w, len);
    outbuf[len] = '\0';
    return len;
}

回到上一层的函数中,如果没有fmt->pr则将fmt->name追加到outbuf里。

        /* prepare data and calculate leftpad */
        if(p && fmt->pr) amount = (*fmt->pr)(outbuf,p);
        else amount = snprintf(outbuf, OUTBUF_SIZE, "%s", fmt->name); /* AIX or headers */

并补足末尾0。

        if(amount < 0) outbuf[amount = 0] = '\0';
        else if(amount >= OUTBUF_SIZE) outbuf[amount = OUTBUF_SIZE-1] = '\0';

        switch((fmt->flags) & CF_JUST_MASK) {
        case 0:  /* for AIX, assigned outside this file */
            leftpad = 0;
            break;
        case CF_LEFT:          /* bad */
            leftpad = 0;
            break;
        case CF_RIGHT:     /* OK */
            leftpad = fmt->width - amount;
            if(leftpad < 0) leftpad = 0;
            break;
        case CF_SIGNAL:
            /* if the screen is wide enough, use full 16-character output */
            if(wide_signals) {
                leftpad = 16 - amount;
                legit = 7;
            } else {
                leftpad =  9 - amount;
            }
            if(leftpad < 0) leftpad = 0;
            break;
        case CF_USER:       /* bad */
            leftpad = fmt->width - amount;
            if(leftpad < 0) leftpad = 0;
            if(!user_is_number) leftpad = 0;
            break;
        case CF_WCHAN:       /* bad */
            if(wchan_is_number) {
                leftpad = fmt->width - amount;
                if(leftpad < 0) leftpad = 0;
                break;
            } else {
                if ((active_cols-actual-tmpspace)<1)
                    outbuf[1] = '\0';  /* oops, we (mostly) lose this column... */
                leftpad = 0;
                break;
            }
        case CF_UNLIMITED:
        {
            if(active_cols-actual-tmpspace < 1)
                outbuf[1] = '\0';    /* oops, we (mostly) lose this column... */
            leftpad = 0;
            break;
        }
        default:
            fprintf(stderr, _("bad alignment code\n"));
            break;
        }
        /* At this point:
         *
         * correct   from previous column
         * actual    from previous column
         * amount    not needed (garbage due to chopping)
         * leftpad   left padding for this column alone (not make-up or gap)
         * space     not needed (will recalculate now)
         * dospace   if we require space between this and the prior column
         * legit     space we were allowed to steal, and thus did steal
         */
        space = correct - actual + leftpad;
        if(space<1) space=dospace;
        if(space>SPACE_AMOUNT) space=SPACE_AMOUNT;  // only so much available

        /* real size -- don't forget in 'amount' is number of cells */
        outbuf[OUTBUF_SIZE-1] = '\0';
        sz = strlen(outbuf);

        /* print data, set x position stuff */
        if(!fmt->next) {
            /* Last column. Write padding + data + newline all together. */
            outbuf[sz] = '\n';
            fwrite(outbuf-space, space+sz+1, 1, stdout);
            break;
        }
        /* Not the last column. Write padding + data together. */
        fwrite(outbuf-space, space+sz, 1, stdout);
        actual  += space+amount;
        correct += fmt->width;
        correct += legit;        /* adjust for SIGNAL expansion */
        if(fmt->pr && fmt->next->pr) { /* neither is AIX filler */
            correct++;
            dospace = 1;
        } else {
            dospace = 0;
        }
        fmt = fmt->next;
        /* At this point:
         *
         * correct   screen position we should be at
         * actual    screen position we are at
         * amount    not needed
         * leftpad   not needed
         * space     not needed
         * dospace   if have determined that we need a space next time
         * legit     not needed
         */
    }
}

psproc 源码阅读 - 4

回到main中,还剩最后一点点代码:

    lists_and_needs(); 
    finalize_stacks(); //<===

    if(forest_type || sort_list) fancy_spew(); 
    else simple_spew(); /* no sort, no forest */
    show_one_proc((proc_t *)-1,format_list); /* no output yet? */

    procps_pids_unref(&Pids_info);
    return 0;
}

finalize_stacks是一个……基本由宏组成的函数。中间重复的宏太多了我就删掉了。

static void finalize_stacks (void)
{
    format_node *f_node;
    sort_node *s_node;

#if (PIDSITEMS < 60)
# error PIDSITEMS (common.h) should be at least 60!
#endif

    /* first, ensure minimum result structures for items
       which may or may not actually be displayable ... */
    Pids_index = 0;

    // needed by for selections
    chkREL(CMD)
    chkREL(ID_EGID)
    ………………
    chkREL(extra)
    chkREL(noop)

    // now accommodate any results not yet satisfied
    f_node = format_list;
    while (f_node) {
        (*f_node->pr)(NULL, NULL);
        f_node = f_node->next;
    }
    s_node = sort_list;
    while (s_node) {
        if (s_node->xe) (*s_node->xe)(NULL, NULL);
        s_node = s_node->next;
    }

    procps_pids_reset(Pids_info, Pids_items, Pids_index);
}

其中,chkREL的定义如下:

#define namREL(e) rel_ ## e
#define makEXT(e) extern int namREL(e);
#define makREL(e) int namREL(e) = -1;
#define chkREL(e) if (namREL(e) < 0) { \
      Pids_items[Pids_index] = PIDS_ ## e; \
      namREL(e) = (Pids_index < PIDSITEMS) ? Pids_index++ : rel_noop; }

展开一下就是:

   if(rel_XX < 0) {
     Pids_items[Pids_index] = PIDS_XX;
     rel_XX = (Pids_index < PIDSITEMS) ? Pids_index++ : rel_noop;
   }

chkREL实际做的事情就是初始化Pids_items中各不同的rel_XXX项。然后,对format_list中的每一项,都调用其pr()来处理。pr其实就是print函数,调用snprintf向其outbuf来输出内容。然后,对sort_list中的每一项,调用其xe()来处理。最后,调用procps_pids_reset。之前看过一次就不再重复了。

回到main中,下一个函数是fancy_spew,当然仅当开启forest_type / sort_list后才调用。

    if(forest_type || sort_list) fancy_spew();  //<---
    else simple_spew(); /* no sort, no forest */
    show_one_proc((proc_t *)-1,format_list); /* no output yet? */

    procps_pids_unref(&Pids_info);
    return 0;
}

fancy_spew定义如下:

/***** sorted or forest */
static void fancy_spew(void) {
    struct pids_fetch *pidread;
    enum pids_fetch_type which;
    proc_t *buf;
    int i, n = 0;

    which = (thread_flags & TF_loose_tasks)
            ? PIDS_FETCH_THREADS_TOO : PIDS_FETCH_TASKS_ONLY;

    pidread = procps_pids_reap(Pids_info, which);
    if (!pidread || !pidread->counts->total) {
        fprintf(stderr, _("fatal library error, reap\n"));
        exit(EXIT_FAILURE);
    }
    processes = xcalloc(pidread->counts->total, sizeof(void*));
    for (i = 0; i < pidread->counts->total; i++) {
        buf = pidread->stacks[i];
        value_this_proc_pcpu(buf);
        if (want_this_proc(buf))
            processes[n++] = buf;
    }
    if (n) {
        if(forest_type) prep_forest_sort();
        while(sort_list) {
            procps_pids_sort(Pids_info, processes, n, sort_list->sr, sort_list->reverse);
            sort_list = sort_list->next;
        }
        if(forest_type) show_forest(n);
        else show_proc_array(n);
    }
    free(processes);
}

调用的第一个函数是procps_pids_reap。这个函数是一个重要的信息处理函数。它调用pids_oldproc_open。

/* procps_pids_reap():
 *
 * Harvest all the available tasks/threads and provide the result
 * stacks along with a summary of the information gathered.
 *
 * Returns: pointer to a pids_fetch struct on success, NULL on error.
 */
PROCPS_EXPORT struct pids_fetch *procps_pids_reap (
    struct pids_info *info,
    enum pids_fetch_type which)
{
    int rc;

    errno = EINVAL;
    if (info == NULL)
        return NULL;
    if (which != PIDS_FETCH_TASKS_ONLY && which != PIDS_FETCH_THREADS_TOO)
        return NULL;
    /* with items & numitems technically optional at 'new' time, it's
       expected 'reset' will have been called -- but just in case ... */
    if (!info->curitems)
        return NULL;
    errno = 0;

    if (!pids_oldproc_open(&info->fetch_PT, info->oldflags))
        return NULL;
    info->read_something = which ? readeither : readproc;

    rc = pids_stacks_fetch(info);

    pids_oldproc_close(&info->fetch_PT);
    // we better have found at least 1 pid
    return (rc > 0) ? &info->fetch.results : NULL;
} // end: procps_pids_reap

pids_oldproc_open定义如下,重要的一眼就可以看出来,openproc函数。

static inline int pids_oldproc_open (
    PROCTAB **this,
    unsigned flags,
    ...)
{
    va_list vl;
    int *ids;
    int num = 0;

    if (*this == NULL) {
        va_start(vl, flags);
        ids = va_arg(vl, int*);
        if (flags & PROC_UID) num = va_arg(vl, int);
        va_end(vl);
        if (NULL == (*this = openproc(flags, ids, num)))
            return 0;
    }
    return 1;
} // end: pids_oldproc_open

openproc的定义如下,函数比较长,重要的内容我们分段切入阅读。

// initiate a process table scan
PROCTAB *openproc(unsigned flags, ...) {
    va_list ap;
    struct stat sbuf;
    static __thread int did_stat;
    PROCTAB *PT = calloc(1, sizeof(PROCTAB));

    if (!PT)
        return NULL;
    if (!did_stat) {
        task_dir_missing = stat("/proc/self/task", &sbuf);
        did_stat = 1;
    }

这里注册一些处理函数。

    PT->taskdir = NULL;
    PT->taskdir_user = -1;
    PT->taskfinder = simple_nexttid;
    PT->taskreader = simple_readtask;

    PT->reader = simple_readproc;
    if (flags & PROC_PID) {
        PT->procfs = NULL;
        PT->finder = listed_nextpid;
    } else {
        PT->procfs = opendir("/proc");
        if (!PT->procfs) {
            free(PT);
            return NULL;
        }
        PT->finder = simple_nextpid;
    }
    PT->flags = flags;

如果传入的内容包含一组pid/uid则这里读取它们。我们现在还没遇到这个情况,暂且不理。

    va_start(ap, flags);
    if (flags & PROC_PID)
        PT->pids = va_arg(ap, pid_t*);
    else if (flags & PROC_UID) {
        PT->uids = va_arg(ap, uid_t*);
        PT->nuid = va_arg(ap, int);
    }
    va_end(ap);

MAX_BUFSZ为1024 * 64 * 2字节。这里初始化src_buffer和dst_buffer(都是全局变量)。

    if (!src_buffer
            && !(src_buffer = malloc(MAX_BUFSZ))) {
        closedir(PT->procfs);
        free(PT);
        return NULL;
    }
    if (!dst_buffer
            && !(dst_buffer = malloc(MAX_BUFSZ))) {
        closedir(PT->procfs);
        free(src_buffer);
        free(PT);
        return NULL;
    }

    return PT;
}

两个buffer已经申请完成,回到上上一层的procps_pids_reap中。下一个调用的函数是pids_stacks_fetch。其代码如下:

static int pids_stacks_fetch (
    struct pids_info *info)
{
#define n_alloc  info->fetch.n_alloc
#define n_inuse  info->fetch.n_inuse
#define n_saved  info->fetch.n_alloc_save
    struct stacks_extent *ext;

最前方是一堆初始化的,这里先懒得看了,有需要后面再返回来阅读。STACKS_INIT的值是1024。

    // initialize stuff -----------------------------------
    if (!info->fetch.anchor) {
        if (!(info->fetch.anchor = calloc(STACKS_INIT, sizeof(void *))))
            return -1;
        if (!(ext = pids_stacks_alloc(info, STACKS_INIT)))
            return -1;       // here, errno was set to ENOMEM
        memcpy(info->fetch.anchor, ext->stacks, sizeof(void *) * STACKS_INIT);
        n_alloc = STACKS_INIT;
    }
    pids_toggle_history(info);
    memset(&info->fetch.counts, 0, sizeof(struct pids_counts));

之后就到具体的处理函数了。info->read_something由procps_pids_reap设置,如果fetch的是PIDS_FETCH_TASKS_ONLY则read_something是readeither,否则是readproc。这个标志由thread_flags决定(fancy_spew中设置)。

    // iterate stuff --------------------------------------
    n_inuse = 0;
    while (info->read_something(info->fetch_PT, &info->fetch_proc)) {

readeither,其代码如下。调用的函数也如注释中描述的那样,是一堆simple_函数。

//////////////////////////////////////////////////////////////////////////////////
// readeither: return a pointer to a proc_t filled with requested info about
// the next unique process or task available.  If no more are available,
// return a null pointer (boolean false).
proc_t *readeither (PROCTAB *restrict const PT, proc_t *restrict x) {
    static __thread proc_t skel_p;    // skeleton proc_t, only uses tid + tgid
    static __thread proc_t *new_p;    // for process/task transitions
    static __thread int canary, leader;
    char path[PROCPATHLEN];
    proc_t *ret;

    free_acquired(x);

    if (new_p) {
        if (new_p->tid != canary) new_p = NULL;
        goto next_task;
    }

next_proc:
    new_p = NULL;
    for (;;) {
        if (errno == ENOMEM) goto end_procs;
        // fills in the PT->path, plus skel_p.tid and skel_p.tgid
        if (!PT->finder(PT,&skel_p)) goto end_procs;       // simple_nextpid
        leader = skel_p.tid;
        if (!task_dir_missing) break;
        if ((ret = PT->reader(PT,x))) return ret;          // simple_readproc
    }

next_task:
    // fills in our path, plus x->tid and x->tgid
    if (!(PT->taskfinder(PT,&skel_p,x,path)))              // simple_nexttid
        goto next_proc;
    /* to avoid loss of some thread group leader data,
       we must check its base dir, not its 'task' dir! */
    if (x->tid == leader) ret = PT->reader(PT,x);          // simple_readproc
    else ret = PT->taskreader(PT,x,path);                  // simple_readtask
    if (!ret) goto next_proc;
    if (!new_p) {
        new_p = ret;
        canary = new_p->tid;
    }
    return ret;

end_procs:
    return NULL;
}

我们随便抽几个例子看一下。首先是simple_nextpid。它在/proc下读取下一个文件夹。

//////////////////////////////////////////////////////////////////////////////////
// This finds processes in /proc in the traditional way.
// Return non-zero on success.
static int simple_nextpid(PROCTAB *restrict const PT, proc_t *restrict const p) {
static __thread struct dirent *ent; /* dirent handle */
char *restrict const path = PT->path;
for (;;) {
ent = readdir(PT->procfs);
if(!ent || !ent->d_name[0]) return 0;
if(*ent->d_name > '0' && *ent->d_name <= '9') break;
}
p->tgid = strtoul(ent->d_name, NULL, 10);
p->tid = p->tgid;
snprintf(path, PROCPATHLEN, "/proc/%s", ent->d_name);
return 1;
}

而具体处理/proc信息的是simple_readproc,我们依旧分解着来读。

//////////////////////////////////////////////////////////////////////////////////
// This reads process info from /proc in the traditional way, for one process.
// The pid (tgid? tid?) is already in p, and a path to it in path, with some
// room to spare.
static proc_t *simple_readproc(PROCTAB *restrict const PT, proc_t *restrict const p) {
    static __thread struct utlbuf_s ub = { NULL, 0 };    // buf for stat,statm,status
    static __thread struct stat sb;     // stat() buffer
    char *restrict const path = PT->path;
    unsigned flags = PT->flags;
    int rc = 0;

    if (stat(path, &sb) == -1)                  /* no such dirent (anymore) */
        goto next_proc;

    if ((flags & PROC_UID) && !XinLN(uid_t, sb.st_uid, PT->uids, PT->nuid))
        goto next_proc;                      /* not one of the requested uids */

这里遇到一个XinLN宏,用于测试列表的前N个项目中有没有某个类型的值。这里列表是PT->uids。

/* Test if item X of type T is present in the 0 terminated list L */
#   define XinL(T, X, L) ( {                    \
            T  x = (X), *l = (L);               \
            while (*l && *l != x) l++;          \
            *l == x;                            \
        } )

/* Test if item X of type T is present in the list L of length N */
#   define XinLN(T, X, L, N) ( {                \
            T x = (X), *l = (L);                \
            int i = 0, n = (N);                 \
            while (i < n && l[i] != x) i++;     \
            i < n && l[i] == x;                 \
        } )

回到原来的代码里。很快可以看到一个名为file2str的函数,用于从文件中读取数据,放到ub->buf里。

static int file2str(const char *directory, const char *what, struct utlbuf_s *ub) {
#define buffGRW 1024
    char path[PROCPATHLEN];
    int fd, num, tot_read = 0, len;

    /* on first use we preallocate a buffer of minimum size to emulate
       former 'local static' behavior -- even if this read fails, that
       buffer will likely soon be used for another subdirectory anyway
       ( besides, with the calloc call we will never need use memcpy ) */
    if (ub->buf) ub->buf[0] = '\0';
    else {
        ub->buf = calloc(1, (ub->siz = buffGRW));
        if (!ub->buf) return -1;
    }
    len = snprintf(path, sizeof path, "%s/%s", directory, what);
    if (len <= 0 || (size_t)len >= sizeof path) return -1;
    if (-1 == (fd = open(path, O_RDONLY, 0))) return -1;
    while (0 < (num = read(fd, ub->buf + tot_read, ub->siz - tot_read))) {
        tot_read += num;
        if (tot_read < ub->siz) break;
        if (ub->siz >= INT_MAX - buffGRW) {
            tot_read--;
            break;
        }
        if (!(ub->buf = realloc(ub->buf, (ub->siz += buffGRW)))) {
            close(fd);
            return -1;
        }
    };
    ub->buf[tot_read] = '\0';
    close(fd);
    if (tot_read < 1) return -1;
    return tot_read;
#undef buffGRW
}

回到外层函数,可以看到它读取stat,然后调用stat2proc(其他同)对读取到的数据进行处理。这里的处理都是psproc的核心功能,因此我们会挨个跟踪进去。

    p->euid = sb.st_uid;                        /* need a way to get real uid */
    p->egid = sb.st_gid;                        /* need a way to get real gid */

首先是stat2proc。stat文件形式类似:

$ cat /proc/13/stat
13 (bash) S 12 13 12 1025 0 0 0 0 0 0 21 59 120 489 20 0 1 0 34 213183188992 1011 18446744073709551615 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

stat2proc(是的我没贴代码……不然太多了)会去查找( ) 括号中的内容,这个是来自task_struct结构的进程名,最长只有15字节。为了处理进程名中的特殊字符,它还会对其进行简单替换,然后对剩余内容进行扫描(sscanf)并保存到结构体中。

    if (flags & PROC_FILLSTAT) {                // read /proc/#/stat
        if (file2str(path, "stat", &ub) == -1)
            goto next_proc;
        rc += stat2proc(ub.buf, p);
    }

接下来是io的内容,一个sscanf解决。

    if (flags & PROC_FILLIO) {                  // read /proc/#/io
        if (file2str(path, "io", &ub) != -1)
            io2proc(ub.buf, p);
    }

然后是读取smaps,不过是从smaps_rollup读取。这个文件在哪个系统里有我目前还不清楚。它解析该文件,然后把每一项都创建一个object存起来。

    if (flags & PROC_FILLSMAPS) {               // read /proc/#/smaps_rollup
        if (file2str(path, "smaps_rollup", &ub) != -1)
            smaps2proc(ub.buf, p);
    }

statm,一样是sscanf读取。

    if (flags & PROC_FILLMEM) {                 // read /proc/#/statm
        if (file2str(path, "statm", &ub) != -1)
            statm2proc(ub.buf, p);
    }

status,一样。pwcache_get_user调用getpwuid来获取信息。如果用户名过长,则只复制uid部分。

    if (flags & PROC_FILLSTATUS) {              // read /proc/#/status
        if (file2str(path, "status", &ub) != -1) {
            rc += status2proc(ub.buf, p, 1);
            if (flags & (PROC_FILL_SUPGRP & ~PROC_FILLSTATUS))
                rc += supgrps_from_supgids(p);
            if (flags & (PROC_FILL_OUSERS & ~PROC_FILLSTATUS)) {
                p->ruser = pwcache_get_user(p->ruid);
                p->suser = pwcache_get_user(p->suid);
                p->fuser = pwcache_get_user(p->fuid);
            }
            if (flags & (PROC_FILL_OGROUPS & ~PROC_FILLSTATUS)) {
                p->rgroup = pwcache_get_group(p->rgid);
                p->sgroup = pwcache_get_group(p->sgid);
                p->fgroup = pwcache_get_group(p->fgid);
            }
        }
    }

    // if multithreaded, some values are crap
    if(p->nlwp > 1)
        p->wchan = ~0ul;

    /* some number->text resolving which is time consuming */
    /* ( names are cached, so memcpy to arrays was silly ) */
    if (flags & PROC_FILLUSR)
        p->euser = pwcache_get_user(p->euid);
    if (flags & PROC_FILLGRP)
        p->egroup = pwcache_get_group(p->egid);

继续,environ部分。首先仍然是读取environ文件,但是如果失败了,则调用vectorize_dash_rc解析p->environ_v。

    if (flags & PROC_FILLENV)                   // read /proc/#/environ
        if (!(p->environ_v = file2strvec(path, "environ")))
            rc += vectorize_dash_rc(&p->environ_v);
    if (flags & PROC_EDITENVRCVT)
        rc += fill_environ_cvt(path, p);

这里出现了一个新函数,vectorize_dash_rc,把"-"转为一个“vector元素”保存在*vec中。

// This littl' guy just serves those true vectorized fields
// ( when a /proc source field didn't exist )
static int vectorize_dash_rc (char ***vec) {
    if (!(*vec = vectorize_this_str("-")))
        return 1;
    return 0;
}
char **vectorize_this_str (const char *src) {
#define pSZ  (sizeof(char*))
    char *cpy, **vec;
    size_t adj, tot;

    tot = strlen(src) + 1;                       // prep for our vectors
    if (tot < 1 || tot >= INT_MAX) tot = INT_MAX-1; // integer overflow?
    adj = (pSZ-1) - ((tot + pSZ-1) & (pSZ-1));   // calc alignment bytes
    cpy = calloc(1, tot + adj + (2 * pSZ));      // get new larger buffer
    if (!cpy) return NULL;                       // oops, looks like ENOMEM
    snprintf(cpy, tot, "%s", src);               // duplicate their string
    vec = (char**)(cpy + tot + adj);             // prep pointer to pointers
    *vec = cpy;                                  // point 1st vector to string
    *(vec+1) = NULL;                             // null ptr 'list' delimit
    return vec;                                  // ==> free(*vec) to dealloc
#undef pSZ
}

最后回来调用fill_environ_cvt。

// This routine reads an 'environ' for the designated proc_t and
// guarantees the caller a valid proc_t.environ pointer.
static int fill_environ_cvt (const char *directory, proc_t *restrict p) {
    dst_buffer[0] = '\0';
    if (read_unvectored(src_buffer, MAX_BUFSZ, directory, "environ", ' '))
        escape_str(dst_buffer, src_buffer, MAX_BUFSZ);
    p->environ = strdup(dst_buffer[0] ? dst_buffer : "-");
    if (!p->environ)
        return 1;
    return 0;
}

它调用read_unvectored来解析数据。它的前半部分从文件中读取数据(这个单独封装成一个函数不好吗……重复看到好多次了)。如果出现“\n”或者“\0”,则改为“sep”(最后一个参数)。

// this is the former under utilized 'read_cmdline', which has been
// generalized in support of these new libproc flags:
//     PROC_EDITCGRPCVT, PROC_EDITCMDLCVT and PROC_EDITENVRCVT
static int read_unvectored(char *restrict const dst, unsigned sz, const char *whom, const char *what, char sep) {
    char path[PROCPATHLEN];
    int fd, len;
    unsigned n = 0;

    if(sz <= 0) return 0;
    if(sz >= INT_MAX) sz = INT_MAX-1;
    dst[0] = '\0';

    len = snprintf(path, sizeof(path), "%s/%s", whom, what);
    if(len <= 0 || (size_t)len >= sizeof(path)) return 0;
    fd = open(path, O_RDONLY);
    if(fd==-1) return 0;

    for(;;) {
        ssize_t r = read(fd,dst+n,sz-n);
        if(r==-1) {
            if(errno==EINTR) continue;
            break;
        }
        if(r<=0) break;  // EOF
        n += r;
        if(n==sz) {      // filled the buffer
            --n;         // make room for '\0'
            break;
        }
    }
    close(fd);
    if(n) {
        unsigned i = n;
        while(i && dst[i-1]=='\0') --i; // skip trailing zeroes
        while(i--)
            if(dst[i]=='\n' || dst[i]=='\0') dst[i]=sep;
        if(dst[n-1]==' ') dst[n-1]='\0';
    }
    dst[n] = '\0';
    return n;
}

处理结束后调用escape_str。MAX_BUFSZ是1024642。escape_str定义如下,用于标准化字符串。

static inline void esc_all (unsigned char *str) {
    unsigned char c;

    // if bad locale/corrupt str, replace non-printing stuff
    while (*str) {
        if ((c = ESC_tab[*str]) != '|')
            *str = c;
        ++str;
    }
}

static inline void esc_ctl (unsigned char *str, int len) {
    int i, n;

    for (i = 0; i < len; ) {
        // even with a proper locale, strings might be corrupt
        if ((n = UTF_tab[*str]) < 0 || i + n > len) {
            esc_all(str);
            return;
        }
        // and eliminate those non-printing control characters
        if (*str < 0x20 || *str == 0x7f)
            *str = '?';
        str += n;
        i += n;
    }
}

int escape_str (unsigned char *dst, const unsigned char *src, int bufsize) {
    static __thread int utf_sw = 0;
    int n;

    if (utf_sw == 0) {
        char *enc = nl_langinfo(CODESET);
        utf_sw = enc && strcasecmp(enc, "UTF-8") == 0 ? 1 : -1;
    }
    SECURE_ESCAPE_ARGS(dst, bufsize);
    n = snprintf(dst, bufsize, "%s", src);
    if (n < 0) {
        *dst = '\0';
        return 0;
    }
    if (n >= bufsize) n = bufsize-1;
    if (utf_sw < 0)
        esc_all(dst);
    else
        esc_ctl(dst, n);
    return n;
}

下一行,类似的做法,只不过处理cmdline。

    if (flags & PROC_FILLARG)                   // read /proc/#/cmdline
        if (!(p->cmdline_v = file2strvec(path, "cmdline")))
            rc += vectorize_dash_rc(&p->cmdline_v);
    if (flags & PROC_EDITCMDLCVT)
        rc += fill_cmdline_cvt(path, p);

fill_cmdline_cvt稍有不同。

// This routine reads a 'cmdline' for the designated proc_t, "escapes"
// the result into a single string while guaranteeing the caller a
// valid proc_t.cmdline pointer.
static int fill_cmdline_cvt (const char *directory, proc_t *restrict p) {
#define uFLG ( ESC_BRACKETS | ESC_DEFUNCT )
    if (read_unvectored(src_buffer, MAX_BUFSZ, directory, "cmdline", ' '))
        escape_str(dst_buffer, src_buffer, MAX_BUFSZ);
    else
        escape_command(dst_buffer, p, MAX_BUFSZ, uFLG);
    p->cmdline = strdup(dst_buffer[0] ? dst_buffer : "?");
    if (!p->cmdline)
        return 1;
    return 0;
#undef uFLG
}

其中escape_command定义如下。这里将数据解析到pp->cmd中。pp是上一层的“p”,最早在stat中被设置。

// Reads /proc/*/stat files, being careful not to trip over processes with
// names like ":-) 1 2 3 4 5 6".
static int stat2proc (const char *S, proc_t *restrict P) {
    char buf[64], raw[64];
    //...............
        if (!P->cmd) {
            num = tmp - S;
            memcpy(raw, S, num);
            raw[num] = '\0';
            escape_str(buf, raw, sizeof(buf));
            if (!(P->cmd = strdup(buf))) return 1;    //<------------
        }



int escape_command (unsigned char *outbuf, const proc_t *pp, int bytes, unsigned flags) {
    int overhead = 0;
    int end = 0;

    if (flags & ESC_BRACKETS)
        overhead += 2;
    if (flags & ESC_DEFUNCT) {
        if (pp->state == 'Z') overhead += 10;    // chars in " <defunct>"
        else flags &= ~ESC_DEFUNCT;
    }
    if (overhead + 1 >= bytes) {
        // if no room for even one byte of the command name
        outbuf[0] = '\0';
        return 0;
    }
    if (flags & ESC_BRACKETS)
        outbuf[end++] = '['; 
    end += escape_str(outbuf+end, pp->cmd, bytes-overhead);  //<----从cmd拷贝到outbuf+end。
    // we want "[foo] <defunct>", not "[foo <defunct>]"
    if (flags & ESC_BRACKETS)
        outbuf[end++] = ']'; 
    if (flags & ESC_DEFUNCT) {
        memcpy(outbuf+end, " <defunct>", 10);  
        end += 10;
    }
    outbuf[end] = '\0';
    return end;  // bytes, not including the NUL
}

接下来是读取cgroup、oom_score/oom_score_adj/ns/sdlogin

    if ((flags & PROC_FILLCGROUP))              // read /proc/#/cgroup
        if (!(p->cgroup_v = file2strvec(path, "cgroup")))
            rc += vectorize_dash_rc(&p->cgroup_v);
    if (flags & PROC_EDITCGRPCVT)
        rc += fill_cgroup_cvt(path, p);

    if (flags & PROC_FILLOOM) {
        if (file2str(path, "oom_score", &ub) != -1)
            oomscore2proc(ub.buf, p);
        if (file2str(path, "oom_score_adj", &ub) != -1)
            oomadj2proc(ub.buf, p);
    }

    if (flags & PROC_FILLNS)                    // read /proc/#/ns/*
        procps_ns_read_pid(p->tid, &(p->ns));


    if (flags & PROC_FILLSYSTEMD)               // get sd-login.h stuff
        rc += sd2proc(p);

这里读取lxc container相关的内容。

    if (flags & PROC_FILL_LXC)                  // value the lxc name
        p->lxcname = lxc_containers(path);

    if (flags & PROC_FILL_LUID)                 // value the login user id
        p->luid = login_uid(path);

解析exe指向的路径。

    if (flags & PROC_FILL_EXE) {
        if (!(p->exe = readlink_exe(path)))
            rc += 1;
    }

以及最后一小部分内容。

    if (flags & PROC_FILLAUTOGRP)               // value the 2 autogroup fields
        autogroup_fill(path, p);

    if (rc == 0) return p;
    errno = ENOMEM;
next_proc:
    return NULL;
}

读取完成后回到readeither里,另一个重要的是simple_readtask。但它读取的基本一样,不同的是它从/proc/#/task/#下面读。最后重复这一步骤直到所有的被读完。

另一个函数是readproc,是一个简化版readeither它的定义如下

proc_t *readproc(PROCTAB *restrict const PT, proc_t *restrict p) {
    proc_t *ret;

    free_acquired(p);

    for(;;) {
        if (errno == ENOMEM) goto out;
        // fills in the path, plus p->tid and p->tgid
        if (!PT->finder(PT,p)) goto out;

        // go read the process data
        ret = PT->reader(PT,p);
        if(ret) return ret;
    }

out:
    return NULL;
}

回到外侧pids_stacks_fetch中,剩余的基本就是在整理数据,并做展示前的准备工作。

        if (!(n_inuse < n_alloc)) {
            n_alloc += STACKS_GROW;
            if (!(info->fetch.anchor = realloc(info->fetch.anchor, sizeof(void *) * n_alloc))
                    || (!(ext = pids_stacks_alloc(info, STACKS_GROW))))
                return -1;   // here, errno was set to ENOMEM
            memcpy(info->fetch.anchor + n_inuse, ext->stacks, sizeof(void *) * STACKS_GROW);
        }
        if (!pids_proc_tally(info, &info->fetch.counts, &info->fetch_proc))
            return -1;       // here, errno was set to ENOMEM
        if (!pids_assign_results(info, info->fetch.anchor[n_inuse++], &info->fetch_proc))
            return -1;       // here, errno was set to ENOMEM
    }
    /* while the possibility is extremely remote, the readproc.c (read_something) |
       simple_readproc and simple_readtask guys could have encountered this error |
       in which case they would have returned a NULL, thus ending our while loop. | */
    if (errno == ENOMEM)
        return -1;

    // finalize stuff -------------------------------------
    /* note: we go to this trouble of maintaining a duplicate of the consolidated |
             extent stacks addresses represented as our 'anchor' since these ptrs |
             are exposed to a user (um, not that we don't trust 'em or anything). |
             plus, we can NULL delimit these ptrs which we couldn't do otherwise. | */
    if (n_saved < n_inuse + 1) {
        n_saved = n_inuse + 1;
        if (!(info->fetch.results.stacks = realloc(info->fetch.results.stacks, sizeof(void *) * n_saved)))
            return -1;
    }
    memcpy(info->fetch.results.stacks, info->fetch.anchor, sizeof(void *) * n_inuse);
    info->fetch.results.stacks[n_inuse] = NULL;

    return n_inuse;     // callers beware, this might be zero !
#undef n_alloc
#undef n_inuse
#undef n_saved
} // end: pids_stacks_fetch