ssh client代码阅读 (unfinished)

继续来读代码。今天读ssh client。在这之前,让我们先了解一下ssh协议。



1. 客户端链接服务器;(C->S)
2. 服务端发送服务端的公钥;(S->C)
3. 服务端和客户端互相沟通,产生一个安全通道;(C<->S)
4. 用户登录到服务端的操作系统里


RFC 4251 - Secure Shell (SSH) 协议架构
RFC 4253 - Secure Shell (SSH) 传输层协议
RFC 4252 - Secure Shell (SSH) 身份验证协议
RFC 4254 - Secure Shell (SSH) 连接协议


struct session_state {
    int connection_in;
    int connection_out;

    /* Protocol flags for the remote side. */
    u_int remote_protocol_flags;

    /* Encryption context for receiving data.  Only used for decryption. */
    struct sshcipher_ctx *receive_context;

    /* Encryption context for sending data.  Only used for encryption. */
    struct sshcipher_ctx *send_context;

    /* Buffer for raw input data from the socket. */
    struct sshbuf *input;

    /* Buffer for raw output data going to the socket. */
    struct sshbuf *output;

    /* Buffer for the partial outgoing packet being constructed. */
    struct sshbuf *outgoing_packet;

    /* Buffer for the incoming packet currently being processed. */
    struct sshbuf *incoming_packet;

    /* Scratch buffer for packet compression/decompression. */
    struct sshbuf *compression_buffer;

#ifdef WITH_ZLIB
    /* Incoming/outgoing compression dictionaries */
    z_stream compression_in_stream;
    z_stream compression_out_stream;
    int compression_in_started;
    int compression_out_started;
    int compression_in_failures;
    int compression_out_failures;

    /* default maximum packet size */
    u_int max_packet_size;

    /* Flag indicating whether this module has been initialized. */
    int initialized;

    /* Set to true if the connection is interactive. */
    int interactive_mode;

    /* Set to true if we are the server side. */
    int server_side;

    /* Set to true if we are authenticated. */
    int after_authentication;

    int keep_alive_timeouts;

    /* The maximum time that we will wait to send or receive a packet */
    int packet_timeout_ms;

    /* Session key information for Encryption and MAC */
    struct newkeys *newkeys[MODE_MAX];
    struct packet_state p_read, p_send;

    /* Volume-based rekeying */
    u_int64_t max_blocks_in, max_blocks_out, rekey_limit;

    /* Time-based rekeying */
    u_int32_t rekey_interval;   /* how often in seconds */
    time_t rekey_time;  /* time of last rekeying */

    /* roundup current message to extra_pad bytes */
    u_char extra_pad;

    /* XXX discard incoming data after MAC error */
    u_int packet_discard;
    size_t packet_discard_mac_already;
    struct sshmac *packet_discard_mac;

    /* Used in packet_read_poll2() */
    u_int packlen;

    /* Used in packet_send2 */
    int rekeying;

    /* Used in ssh_packet_send_mux() */
    int mux;

    /* Used in packet_set_interactive */
    int set_interactive_called;

    /* Used in packet_set_maxsize */
    int set_maxsize_called;

    /* One-off warning about weak ciphers */
    int cipher_warning_done;

    /* Hook for fuzzing inbound packets */
    ssh_packet_hook_fn *hook_in;
    void *hook_in_ctx;

    TAILQ_HEAD(, packet) outgoing;

以incoming_packet为例,可以在main等 --> ssh_packet_set_connection --> ssh_alloc_session_state 中看到它的踪迹。ssh_alloc_session_state()初始化一个ssh结构体。

struct ssh *
    struct ssh *ssh = NULL;
    struct session_state *state = NULL;

    if ((ssh = calloc(1, sizeof(*ssh))) == NULL ||
        (state = calloc(1, sizeof(*state))) == NULL ||
        (ssh->kex = kex_new()) == NULL ||
        (state->input = sshbuf_new()) == NULL ||
        (state->output = sshbuf_new()) == NULL ||
        (state->outgoing_packet = sshbuf_new()) == NULL ||
        (state->incoming_packet = sshbuf_new()) == NULL)
        goto fail;
    state->connection_in = -1;
    state->connection_out = -1;
    state->max_packet_size = 32768;
    state->packet_timeout_ms = -1;
    state->p_send.packets = state->p_read.packets = 0;
    state->initialized = 1;
     * ssh_packet_send2() needs to queue packets until
     * we've done the initial key exchange.
    state->rekeying = 1;
    ssh->state = state;
    return ssh;
    if (ssh) {
    if (state) {
    return NULL;


struct sshbuf *
    struct sshbuf *ret;

    if ((ret = calloc(sizeof(*ret), 1)) == NULL)
        return NULL;
    ret->alloc = SSHBUF_SIZE_INIT;
    ret->max_size = SSHBUF_SIZE_MAX;
    ret->readonly = 0;
    ret->refcount = 1;
    ret->parent = NULL;
    if ((ret->cd = ret->d = calloc(1, ret->alloc)) == NULL) {
        return NULL;
    return ret;

继续以incoming_packet为例,可以看到它被引用的位置全部都在packet.c中。因此这个文件一定是与收发包相关的重要文件。从密度上看,ssh_packet_read_poll2是密度最高的,它有如下的调用关系:ssh_packet_read (ssh_packet_read_expect) --> ssh_packet_read_seqnr --> ssh_packet_read_poll_seqnr --> ssh_packet_read_poll2。其中,ssh_packet_read(_expect)没有任何引用,因此放弃这条路线;另一条路线是main (skip_connect label) --> ssh_session2 --> client_loop --> client_process_buffered_input_packets(clientloop.c,serverloop.c中也有类似路径) --> ssh_dispatch_run_fatal --> ssh_dispatch_run --> ssh_packet_read_seqnr --> ssh_packet_read_poll_seqnr --> ssh_packet_read_poll2。显然下面这条是client的路径。我们先不看main,而是直接从client_loop这里开始。

client_loop用于实现与服务器的交互会话。它在用户通过身份验证,并且在远程主机上启动了一个命令后被调用。 escape_char是 SSH_ESCAPECHAR_NONE以外的字符时,会被用于终止或暂停会话的控制字符。


/* Initialize variables. */
last_was_cr = 1;
exit_status = -1;
connection_in = ssh_packet_get_connection_in(ssh);
connection_out = ssh_packet_get_connection_out(ssh);

/* Returns the socket used for reading. */

ssh_packet_get_connection_in(struct ssh *ssh)
    return ssh->state->connection_in;

/* Returns the descriptor used for writing. */

ssh_packet_get_connection_out(struct ssh *ssh)
    return ssh->state->connection_out;


 * Set signal handlers, (e.g. to restore non-blocking mode)
 * but don't overwrite SIG_IGN, matches behaviour from rsh(1)
if (ssh_signal(SIGHUP, SIG_IGN) != SIG_IGN)
    ssh_signal(SIGHUP, signal_handler);
if (ssh_signal(SIGINT, SIG_IGN) != SIG_IGN)
    ssh_signal(SIGINT, signal_handler);
if (ssh_signal(SIGQUIT, SIG_IGN) != SIG_IGN)
    ssh_signal(SIGQUIT, signal_handler);
if (ssh_signal(SIGTERM, SIG_IGN) != SIG_IGN)
    ssh_signal(SIGTERM, signal_handler);
ssh_signal(SIGWINCH, window_change_handler);


if (have_pty)
    enter_raw_mode(options.request_tty == REQUEST_TTY_FORCE);

session_ident = ssh2_chan_id;
if (session_ident != -1) {
    if (escape_char_arg != SSH_ESCAPECHAR_NONE) {
        channel_register_filter(ssh, session_ident,
            client_simple_escape_filter, NULL,
    channel_register_cleanup(ssh, session_ident,
        client_channel_closed, 0);



/* Main loop of the client for the interactive session mode. */
while (!quit_pending) {

    /* Process buffered packets sent by the server. */

处理代码如下,核心在于ssh_dispatch_run。这也正是链条上的:main --> ssh_session2 --> client_loop --> client_process_buffered_input_packets --> ssh_dispatch_run_fatal --> [[ssh_dispatch_run]] --> ssh_packet_read_seqnr --> ssh_packet_read_poll_seqnr --> ssh_packet_read_poll2 这个位置。让我们仔细阅读。

ssh_dispatch_run(struct ssh *ssh, int mode, volatile sig_atomic_t *done)
    int r;
    u_char type;
    u_int32_t seqnr;

    for (;;) {
        if (mode == DISPATCH_BLOCK) {
            r = ssh_packet_read_seqnr(ssh, &type, &seqnr);
            if (r != 0)
                return r;
        } else {
            r = ssh_packet_read_poll_seqnr(ssh, &type, &seqnr);
            if (r != 0)
                return r;
            if (type == SSH_MSG_NONE)
                return 0;
        if (type > 0 && type < DISPATCH_MAX &&
            ssh->dispatch[type] != NULL) {
            if (ssh->dispatch_skip_packets) {
                debug2("skipped packet (type %u)", type);
            r = (*ssh->dispatch[type])(type, seqnr, ssh);
            if (r != 0)
                return r;
        } else {
            r = sshpkt_disconnect(ssh,
                "protocol error: rcvd type %d", type);
            if (r != 0)
                return r;
            return SSH_ERR_DISCONNECTED;
        if (done != NULL && *done)
            return 0;

ssh_dispatch_run_fatal(struct ssh *ssh, int mode, volatile sig_atomic_t *done)
    int r;

    if ((r = ssh_dispatch_run(ssh, mode, done)) != 0)
        sshpkt_fatal(ssh, r, "%s", __func__);
static void
client_process_buffered_input_packets(struct ssh *ssh)
    ssh_dispatch_run_fatal(ssh, DISPATCH_NONBLOCK, &quit_pending);

首先,基于是否阻塞,选择调用ssh_packet_read_seqnr还是ssh_packet_read_poll_seqnr。获取一个type,并用于不同的回调函数上(*ssh->dispatch[type])(type, seqnr, ssh);。这里也是一个典型的状态机没跑了。

    if (mode == DISPATCH_BLOCK) {
        r = ssh_packet_read_seqnr(ssh, &type, &seqnr);
        if (r != 0)
            return r;
    } else {
        r = ssh_packet_read_poll_seqnr(ssh, &type, &seqnr);
        if (r != 0)
            return r;
        if (type == SSH_MSG_NONE)
            return 0;

其余的并没有太多有实际影响的操作,让我们跟入ssh_packet_read_seqnr。它有一个8192字节的buf,等待ssh server的输入,如果有的话,则调用ssh_packet_read_poll_seqnr,这很好,因为上面我们也准备看这个函数。刚好可以一起看掉。

ssh_packet_read_seqnr(struct ssh *ssh, u_char *typep, u_int32_t *seqnr_p)
    struct session_state *state = ssh->state;
    int len, r, ms_remain;
    struct pollfd pfd;
    char buf[8192];
    struct timeval start;
    struct timespec timespec, *timespecp = NULL;


     * Since we are blocking, ensure that all written packets have
     * been sent.
    if ((r = ssh_packet_write_wait(ssh)) != 0)
        goto out;

    /* Stay in the loop until we have received a complete packet. */
    for (;;) {
        /* Try to read a packet from the buffer. */
        r = ssh_packet_read_poll_seqnr(ssh, typep, seqnr_p);
        if (r != 0)
        /* If we got a packet, return it. */
        if (*typep != SSH_MSG_NONE)


ssh_packet_read_poll_seqnr(struct ssh *ssh, u_char *typep, u_int32_t *seqnr_p)
    struct session_state *state = ssh->state;
    u_int reason, seqnr;
    int r;
    u_char *msg;

    for (;;) {
        msg = NULL;
        r = ssh_packet_read_poll2(ssh, typep, seqnr_p);
        if (r != 0)
            return r;
        if (*typep) {
            state->keep_alive_timeouts = 0;
            DBG(debug("received packet type %d", *typep));
        switch (*typep) {


ssh_packet_read_poll2(struct ssh *ssh, u_char *typep, u_int32_t *seqnr_p)
    struct session_state *state = ssh->state;
    u_int padlen, need;
    u_char *cp;
    u_int maclen, aadlen = 0, authlen = 0, block_size;
    struct sshenc *enc   = NULL;
    struct sshmac *mac   = NULL;
    struct sshcomp *comp = NULL;
    int r;

    if (state->mux)
        return ssh_packet_read_poll2_mux(ssh, typep, seqnr_p);

    *typep = SSH_MSG_NONE;

老规矩,先看看ssh_packet_read_poll32_mux。代码不长。这里会遇到几个工具函数,第一个是sshbuf_ptr。它先检查buf是否合法。这个检查非常严格,包括buf是否已经不在使用,超出大小,长度不正常,offset已经超过其偏移。这些都会导致其抛出SIGSEGV。如果ok的话,返回cd + off。cd是buffer的const data。它是另一个成员“d”的拷贝,只不过加了修饰符const使其不能被修改。当然在编译后就没有区别了。

static inline int
sshbuf_check_sanity(const struct sshbuf *buf)
    if (__predict_false(buf == NULL ||
        (!buf->readonly && buf->d != buf->cd) ||
        buf->refcount < 1 || buf->refcount > SSHBUF_REFS_MAX ||
        buf->cd == NULL ||
        buf->max_size > SSHBUF_SIZE_MAX ||
        buf->alloc > buf->max_size ||
        buf->size > buf->alloc ||
        buf->off > buf->size)) {
        /* Do not try to recover from corrupted buffer internals */
        ssh_signal(SIGSEGV, SIG_DFL);
        return SSH_ERR_INTERNAL_ERROR;
    return 0;

const u_char *
sshbuf_ptr(const struct sshbuf *buf)
    if (sshbuf_check_sanity(buf) != 0)
        return NULL;
    return buf->cd + buf->off;

static int
ssh_packet_read_poll2_mux(struct ssh *ssh, u_char *typep, u_int32_t *seqnr_p)
    struct session_state *state = ssh->state;
    const u_char *cp;
    size_t need;
    int r;

    if (ssh->kex)
        return SSH_ERR_INTERNAL_ERROR;
    *typep = SSH_MSG_NONE;
    cp = sshbuf_ptr(state->input);
    if (state->packlen == 0) {
        if (sshbuf_len(state->input) < 4 + 1)
            return 0; /* packet is incomplete */
        state->packlen = PEEK_U32(cp);
        if (state->packlen < 4 + 1 ||
            state->packlen > PACKET_MAX_SIZE)
            return SSH_ERR_MESSAGE_INCOMPLETE;
    need = state->packlen + 4;
    if (sshbuf_len(state->input) < need)
        return 0; /* packet is incomplete */
    if ((r = sshbuf_put(state->incoming_packet, cp + 4,
        state->packlen)) != 0 ||
        (r = sshbuf_consume(state->input, need)) != 0 ||
        (r = sshbuf_get_u8(state->incoming_packet, NULL)) != 0 ||
        (r = sshbuf_get_u8(state->incoming_packet, typep)) != 0)
        return r;
    if (ssh_packet_log_type(*typep))
        debug3_f("type %u", *typep);
    /* sshbuf_dump(state->incoming_packet, stderr); */
    /* reset for next packet */
    state->packlen = 0;
    return r;

上面这段太长了,我们单独粘出来看。如果当前packlen是0,检查input中的长度是否大于4,大于4的话,取出32字节,作为packlen。它是一个unsigned int类型,下面随后检查大小是否合法(在5~PACKET_MAX_SIZE之间)。PACKET_MAX_SIZE是(256 * 1024)。如果不合法,返回错误。

cp = sshbuf_ptr(state->input);
if (state->packlen == 0) {
    if (sshbuf_len(state->input) < 4 + 1)
        return 0; /* packet is incomplete */
    state->packlen = PEEK_U32(cp);
    if (state->packlen < 4 + 1 ||
        state->packlen > PACKET_MAX_SIZE)

随后,声明需要packlen+4的长度,如果太短则放弃;如果正常则开始接受数据。从cp+4处拷贝packlen的数据到incoming_packet里。这里涉及两个函数,一个是sshbuf_put,一个是sshbuf_reserve。reserve函数用于分配指定长度的数据(如果已经超过max length,则有一个奇特的pack操作,即将一半长度的buffer移动到前面去)。返回保留后的地址,修正buf的真实大小。然后返回到上一层sshbuf_put,它调用memcpy将长度为len的数据从v拷贝到p中。

sshbuf_reserve(struct sshbuf *buf, size_t len, u_char **dpp)
    u_char *dp;
    int r;

    if (dpp != NULL)
        *dpp = NULL;

    SSHBUF_DBG(("reserve buf = %p len = %zu", buf, len));
    if ((r = sshbuf_allocate(buf, len)) != 0)
        return r;

    dp = buf->d + buf->size;
    buf->size += len;
    if (dpp != NULL)
        *dpp = dp;
    return 0;

sshbuf_put(struct sshbuf *buf, const void *v, size_t len)
    u_char *p;
    int r;

    if ((r = sshbuf_reserve(buf, len, &p)) < 0)
        return r;
    if (len != 0)
        memcpy(p, v, len);
    return 0;

need = state->packlen + 4;
if (sshbuf_len(state->input) < need)
    return 0; /* packet is incomplete */
if ((r = sshbuf_put(state->incoming_packet, cp + 4,
    state->packlen)) != 0 ||
    (r = sshbuf_consume(state->input, need)) != 0 ||
    (r = sshbuf_get_u8(state->incoming_packet, NULL)) != 0 ||
    (r = sshbuf_get_u8(state->incoming_packet, typep)) != 0)
    return r;



xz 源码阅读 - 2


// Fall through

case SEQ_BLOCK: {
    const lzma_ret ret = coder->block_decoder.code(
            coder->block_decoder.coder, allocator,
            in, in_pos, in_size, out, out_pos, out_size,

    if (ret != LZMA_STREAM_END)
        return ret;

    // Block decoded successfully. Add the new size pair to
    // the Index hash.

    coder->sequence = SEQ_BLOCK_HEADER;


next->coder = coder;
next->code = &block_decode;
next->end = &block_decoder_end;
coder->next = LZMA_NEXT_CODER_INIT;

// Basic initializations
coder->sequence = SEQ_CODE;
coder->block = block;
coder->compressed_size = 0;
coder->uncompressed_size = 0;


static lzma_ret
block_decode(void *coder_ptr, const lzma_allocator *allocator,
        const uint8_t *restrict in, size_t *restrict in_pos,
        size_t in_size, uint8_t *restrict out,
        size_t *restrict out_pos, size_t out_size, lzma_action action)
    lzma_block_coder *coder = coder_ptr;

block_decode:1. 注意这里的coder->sequence是另一个“sequence”,它最开始是被初始化成SEQ_CODE的。因此第一步从调用coder->next.code开始。

coder = coder_ptr(参数1)是上一步的coder->block_decoder.coder,因此这个调用会调用:coder->block_decoder.coder->next.coder,这些decoder由filter决定。

    switch (coder->sequence) {
    case SEQ_CODE: {
        const size_t in_start = *in_pos;
        const size_t out_start = *out_pos;

        const lzma_ret ret = coder->next.code(coder->next.coder,
                allocator, in, in_pos, in_size,
                out, out_pos, out_size, action);

        const size_t in_used = *in_pos - in_start;
        const size_t out_used = *out_pos - out_start;

        // NOTE: We compare to compressed_limit here, which prevents
        // the total size of the Block growing past LZMA_VLI_MAX.
        if (update_size(&coder->compressed_size, in_used,
                || update_size(&coder->uncompressed_size,
            return LZMA_DATA_ERROR;

block_decode:2. 调用完成后,调用lzma_check_update。确定CRC32/64并校验。完成后,设置seq为SEQ_PADDING。

        if (!coder->ignore_check)
            lzma_check_update(&coder->check, coder->block->check,
                    out + out_start, out_used);

        if (ret != LZMA_STREAM_END)
            return ret;

        // Compressed and Uncompressed Sizes are now at their final
        // values. Verify that they match the values given to us.
        if (!is_size_valid(coder->compressed_size,
                || !is_size_valid(coder->uncompressed_size,
            return LZMA_DATA_ERROR;

        // Copy the values into coder->block. The caller
        // may use this information to construct Index.
        coder->block->compressed_size = coder->compressed_size;
        coder->block->uncompressed_size = coder->uncompressed_size;

        coder->sequence = SEQ_PADDING;

block_decode:3. 进入SEQ_PADDING状态,将字符填充到4字节对齐状态。

    // Fall through

    case SEQ_PADDING:
        // Compressed Data is padded to a multiple of four bytes.
        while (coder->compressed_size & 3) {
            if (*in_pos >= in_size)
                return LZMA_OK;

            // We use compressed_size here just get the Padding
            // right. The actual Compressed Size was stored to
            // coder->block already, and won't be modified by
            // us anymore.

            if (in[(*in_pos)++] != 0x00)
                return LZMA_DATA_ERROR;

        if (coder->block->check == LZMA_CHECK_NONE)
            return LZMA_STREAM_END;

        if (!coder->ignore_check)
            lzma_check_finish(&coder->check, coder->block->check);

        coder->sequence = SEQ_CHECK;

block_decode:4. 进入SEQ_CHECK状态,首先获取checksize,拷贝原始数据并进行比较。随后结束。

    // Fall through

    case SEQ_CHECK: {
        const size_t check_size = lzma_check_size(coder->block->check);
        lzma_bufcpy(in, in_pos, in_size, coder->block->raw_check,
                &coder->check_pos, check_size);
        if (coder->check_pos < check_size)
            return LZMA_OK;

        // Validate the Check only if we support it.
        // coder->check.buffer may be uninitialized
        // when the Check ID is not supported.
        if (!coder->ignore_check
                && lzma_check_is_supported(coder->block->check)
                && memcmp(coder->block->raw_check,
                    check_size) != 0)
            return LZMA_DATA_ERROR;

        return LZMA_STREAM_END;

    return LZMA_PROG_ERROR;

回到最开始的地方,进入 coder->sequence = SEQ_BLOCK_HEADER; 状态。这是解压的上一个状态,我们跳过看下一个SEQ_INDEX。

case SEQ_INDEX: {
    // If we don't have any input, don't call
    // lzma_index_hash_decode() since it would return
    // LZMA_BUF_ERROR, which we must not do here.
    if (*in_pos >= in_size)
        return LZMA_OK;

    // Decode the Index and compare it to the hash calculated
    // from the sizes of the Blocks (if any).
    const lzma_ret ret = lzma_index_hash_decode(coder->index_hash,
            in, in_pos, in_size);
    if (ret != LZMA_STREAM_END)
        return ret;

    coder->sequence = SEQ_STREAM_FOOTER;

// Fall through


extern LZMA_API(lzma_ret)
lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
        size_t *in_pos, size_t in_size)
    // Catch zero input buffer here, because in contrast to Index encoder
    // and decoder functions, applications call this function directly
    // instead of via lzma_code(), which does the buffer checking.
    if (*in_pos >= in_size)
        return LZMA_BUF_ERROR;

    // NOTE: This function has many similarities to index_encode() and
    // index_decode() functions found from index_encoder.c and
    // index_decoder.c. See the comments especially in index_encoder.c.
    const size_t in_start = *in_pos;
    lzma_ret ret = LZMA_OK;

    while (*in_pos < in_size)
    switch (index_hash->sequence) {
    case SEQ_BLOCK:
        // Check the Index Indicator is present.
        if (in[(*in_pos)++] != 0x00)
            return LZMA_DATA_ERROR;

        index_hash->sequence = SEQ_COUNT;

    case SEQ_COUNT: {
        ret = lzma_vli_decode(&index_hash->remaining,
                &index_hash->pos, in, in_pos, in_size);
        if (ret != LZMA_STREAM_END)
            goto out;

        // The count must match the count of the Blocks decoded.
        if (index_hash->remaining != index_hash->blocks.count)
            return LZMA_DATA_ERROR;

        ret = LZMA_OK;
        index_hash->pos = 0;

        // Handle the special case when there are no Blocks.
        index_hash->sequence = index_hash->remaining == 0
                ? SEQ_PADDING_INIT : SEQ_UNPADDED;

    case SEQ_UNPADDED:
        lzma_vli *size = index_hash->sequence == SEQ_UNPADDED
                ? &index_hash->unpadded_size
                : &index_hash->uncompressed_size;

        ret = lzma_vli_decode(size, &index_hash->pos,
                in, in_pos, in_size);
        if (ret != LZMA_STREAM_END)
            goto out;

        ret = LZMA_OK;
        index_hash->pos = 0;

        if (index_hash->sequence == SEQ_UNPADDED) {
            if (index_hash->unpadded_size < UNPADDED_SIZE_MIN
                    || index_hash->unpadded_size
                        > UNPADDED_SIZE_MAX)
                return LZMA_DATA_ERROR;

            index_hash->sequence = SEQ_UNCOMPRESSED;
        } else {
            // Update the hash.

            // Verify that we don't go over the known sizes. Note
            // that this validation is simpler than the one used
            // in lzma_index_hash_append(), because here we know
            // that values in index_hash->blocks are already
            // validated and we are fine as long as we don't
            // exceed them in index_hash->records.
            if (index_hash->blocks.blocks_size
                    < index_hash->records.blocks_size
                    || index_hash->blocks.uncompressed_size
                    < index_hash->records.uncompressed_size
                    || index_hash->blocks.index_list_size
                    < index_hash->records.index_list_size)
                return LZMA_DATA_ERROR;

            // Check if this was the last Record.
            index_hash->sequence = --index_hash->remaining == 0
                    ? SEQ_PADDING_INIT : SEQ_UNPADDED;


        index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded(
                index_hash->records.index_list_size)) & 3;
        index_hash->sequence = SEQ_PADDING;

    // Fall through

    case SEQ_PADDING:
        if (index_hash->pos > 0) {
            if (in[(*in_pos)++] != 0x00)
                return LZMA_DATA_ERROR;


        // Compare the sizes.
        if (index_hash->blocks.blocks_size
                != index_hash->records.blocks_size
                || index_hash->blocks.uncompressed_size
                != index_hash->records.uncompressed_size
                || index_hash->blocks.index_list_size
                != index_hash->records.index_list_size)
            return LZMA_DATA_ERROR;

        // Finish the hashes and compare them.
        lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST);
        lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST);
        if (memcmp(index_hash->blocks.check.buffer.u8,
                lzma_check_size(LZMA_CHECK_BEST)) != 0)
            return LZMA_DATA_ERROR;

        // Finish the CRC32 calculation.
        index_hash->crc32 = lzma_crc32(in + in_start,
                *in_pos - in_start, index_hash->crc32);

        index_hash->sequence = SEQ_CRC32;

    // Fall through

    case SEQ_CRC32:
        do {
            if (*in_pos == in_size)
                return LZMA_OK;

            if (((index_hash->crc32 >> (index_hash->pos * 8))
                    & 0xFF) != in[(*in_pos)++])
                return LZMA_DATA_ERROR;

        } while (++index_hash->pos < 4);

        return LZMA_STREAM_END;

        return LZMA_PROG_ERROR;

    // Update the CRC32,
    index_hash->crc32 = lzma_crc32(in + in_start,
            *in_pos - in_start, index_hash->crc32);

    return ret;


    // Copy the Stream Footer to the internal buffer.
    lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,

    // Return if we didn't get the whole Stream Footer yet.
    if (coder->pos < LZMA_STREAM_HEADER_SIZE)
        return LZMA_OK;

    coder->pos = 0;

    // Decode the Stream Footer. The decoder gives
    // LZMA_FORMAT_ERROR if the magic bytes don't match,
    // so convert that return code to LZMA_DATA_ERROR.
    lzma_stream_flags footer_flags;

lzma_stream_footer_decode用于解码footer,解码flags并设置backward_size。校验footer size,并返回结果。

    const lzma_ret ret = lzma_stream_footer_decode(
            &footer_flags, coder->buffer);
    if (ret != LZMA_OK)
        return ret == LZMA_FORMAT_ERROR
                ? LZMA_DATA_ERROR : ret;

    // Check that Index Size stored in the Stream Footer matches
    // the real size of the Index field.
    if (lzma_index_hash_size(coder->index_hash)
            != footer_flags.backward_size)
        return LZMA_DATA_ERROR;

    // Compare that the Stream Flags fields are identical in
    // both Stream Header and Stream Footer.
            &coder->stream_flags, &footer_flags));

    if (!coder->concatenated)
        return LZMA_STREAM_END;

    coder->sequence = SEQ_STREAM_PADDING;

// Fall through



        // Skip over possible Stream Padding.
        while (true) {
            if (*in_pos >= in_size) {
                // Unless LZMA_FINISH was used, we cannot
                // know if there's more input coming later.
                if (action != LZMA_FINISH)
                    return LZMA_OK;

                // Stream Padding must be a multiple of
                // four bytes.
                return coder->pos == 0
                        ? LZMA_STREAM_END
                        : LZMA_DATA_ERROR;

            // If the byte is not zero, it probably indicates
            // beginning of a new Stream (or the file is corrupt).
            if (in[*in_pos] != 0x00)

            coder->pos = (coder->pos + 1) & 3;

        // Stream Padding must be a multiple of four bytes (empty
        // Stream Padding is OK).
        if (coder->pos != 0) {
            return LZMA_DATA_ERROR;

        // Prepare to decode the next Stream.
        return_if_error(stream_decoder_reset(coder, allocator));

        return LZMA_PROG_ERROR;

    // Never reached

xz 源码阅读 - 1



截屏2022-01-13 下午7.23.49.png


/* note: in_file and out_file must be open already */
int xz_decompress (FILE *in_file, FILE *out_file)
    lzma_stream strm = LZMA_STREAM_INIT; /* alloc and init lzma_stream struct */

    /* initialize xz decoder */
    ret_xz = lzma_stream_decoder (&strm, memory_limit, flags);
    if (ret_xz != LZMA_OK) {
        fprintf (stderr, "lzma_stream_decoder error: %d\n", (int) ret_xz);
        return RET_ERROR_INIT;

lzma_stream_decoder 调用lzma_next_strm_init,

extern LZMA_API(lzma_ret)
lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
    lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags);

    strm->internal->supported_actions[LZMA_RUN] = true;
    strm->internal->supported_actions[LZMA_FINISH] = true;

    return LZMA_OK;

对应的,宏定义如下,所以相当于1、lzma_strm_init(strm);;2、lzma_stream_decoder_init(strm->internal->next, strm->allocator, ...args...);

/// Initializes lzma_strm and calls func() to initialize strm->internal->next.
/// (The function being called will use lzma_next_coder_init()). If
/// initialization fails, memory that wasn't freed by func() is freed
/// along strm->internal.
#define lzma_next_strm_init(func, strm, ...) \
do { \
    return_if_error(lzma_strm_init(strm)); \
    const lzma_ret ret_ = func(&(strm)->internal->next, \
            (strm)->allocator, __VA_ARGS__); \
    if (ret_ != LZMA_OK) { \
        lzma_end(strm); \
        return ret_; \
    } \
} while (0)



extern lzma_ret
        lzma_next_coder *next, const lzma_allocator *allocator,
        uint64_t memlimit, uint32_t flags)
    lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator);
    if (flags & ~LZMA_SUPPORTED_FLAGS)
        return LZMA_OPTIONS_ERROR;

lzma_next_coder_init也是一个宏,它设置next->init,也就是这里的strm->internal->next->init = lzma_stream_decoder_init


    lzma_stream_coder *coder = next->coder;
    if (coder == NULL) {
        coder = lzma_alloc(sizeof(lzma_stream_coder), allocator);
        if (coder == NULL)
            return LZMA_MEM_ERROR;

        next->coder = coder;
        next->code = &stream_decode;
        next->end = &stream_decoder_end;
        next->get_check = &stream_decoder_get_check;
        next->memconfig = &stream_decoder_memconfig;

        coder->block_decoder = LZMA_NEXT_CODER_INIT;
        coder->index_hash = NULL;

    coder->memlimit = my_max(1, memlimit);
    coder->memusage = LZMA_MEMUSAGE_BASE;
    coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0;
            = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
    coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
    coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
    coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
    coder->first_stream = true;

    return stream_decoder_reset(coder, allocator);


while ((! in_finished) && (! out_finished)) {
    /* read incoming data */
    in_len = fread (in_buf, 1, IN_BUF_MAX, in_file);

    if (feof (in_file)) {
        in_finished = true;
    if (ferror (in_file)) {
        in_finished = true;
        ret = RET_ERROR_INPUT;

    strm.next_in = in_buf;
    strm.avail_in = in_len;

    /* if no more data from in_buf, flushes the
       internal xz buffers and closes the decompressed data
       with LZMA_FINISH */
    action = in_finished ? LZMA_FINISH : LZMA_RUN;

    /* loop until there's no pending decompressed output */
    do {
        /* out_buf is clean at this point */
        strm.next_out = out_buf;
        strm.avail_out = OUT_BUF_MAX;

        /* decompress data */
        ret_xz = lzma_code (&strm, action);


extern LZMA_API(lzma_ret)
lzma_code(lzma_stream *strm, lzma_action action)
    // Sanity checks
    if ((strm->next_in == NULL && strm->avail_in != 0)
            || (strm->next_out == NULL && strm->avail_out != 0)
            || strm->internal == NULL
            || strm->internal->next.code == NULL
            || (unsigned int)(action) > LZMA_ACTION_MAX
            || !strm->internal->supported_actions[action])
        return LZMA_PROG_ERROR;

    // Check if unsupported members have been set to non-zero or non-NULL,
    // which would indicate that some new feature is wanted.
    if (strm->reserved_ptr1 != NULL
            || strm->reserved_ptr2 != NULL
            || strm->reserved_ptr3 != NULL
            || strm->reserved_ptr4 != NULL
            || strm->reserved_int1 != 0
            || strm->reserved_int2 != 0
            || strm->reserved_int3 != 0
            || strm->reserved_int4 != 0
            || strm->reserved_enum1 != LZMA_RESERVED_ENUM
            || strm->reserved_enum2 != LZMA_RESERVED_ENUM)
        return LZMA_OPTIONS_ERROR;


switch (strm->internal->sequence) {
case ISEQ_RUN:
    switch (action) {
    case LZMA_RUN:

        strm->internal->sequence = ISEQ_SYNC_FLUSH;

        strm->internal->sequence = ISEQ_FULL_FLUSH;

    case LZMA_FINISH:
        strm->internal->sequence = ISEQ_FINISH;

        strm->internal->sequence = ISEQ_FULL_BARRIER;


    // The same action must be used until we return
    // LZMA_STREAM_END, and the amount of input must not change.
    if (action != LZMA_SYNC_FLUSH
            || strm->internal->avail_in != strm->avail_in)
        return LZMA_PROG_ERROR;


    if (action != LZMA_FULL_FLUSH
            || strm->internal->avail_in != strm->avail_in)
        return LZMA_PROG_ERROR;


    if (action != LZMA_FINISH
            || strm->internal->avail_in != strm->avail_in)
        return LZMA_PROG_ERROR;


    if (action != LZMA_FULL_BARRIER
            || strm->internal->avail_in != strm->avail_in)
        return LZMA_PROG_ERROR;


case ISEQ_END:
    return LZMA_STREAM_END;

    return LZMA_PROG_ERROR;


size_t in_pos = 0;
size_t out_pos = 0;
lzma_ret ret = strm->internal->next.code(
        strm->internal->next.coder, strm->allocator,
        strm->next_in, &in_pos, strm->avail_in,
        strm->next_out, &out_pos, strm->avail_out, action);

例如block decoder的:

    next->coder = coder;
    next->code = &block_decode;
    next->end = &block_decoder_end;
    coder->next = LZMA_NEXT_CODER_INIT;

index decoder的:

    next->coder = coder;
    next->code = &index_decode;
    next->end = &index_decoder_end;
    next->memconfig = &index_decoder_memconfig;
    coder->index = NULL;

xz支持的一共有:alone decoder、auto decoder、block decoder、index decoder、stream decoder、delta decoder、lz decoder和它们对应的encoder。xz也支持一个simple decoder,位于simple_coder.c。


static lzma_ret
stream_decode(void *coder_ptr, const lzma_allocator *allocator,
        const uint8_t *restrict in, size_t *restrict in_pos,
        size_t in_size, uint8_t *restrict out,
        size_t *restrict out_pos, size_t out_size, lzma_action action)
    lzma_stream_coder *coder = coder_ptr;

    // When decoding the actual Block, it may be able to produce more
    // output even if we don't give it any new input.
    while (true)
    switch (coder->sequence) {


    // Copy the Stream Header to the internal buffer.
    lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
  1. 调用lzma_bufcpy拷贝LZMA_STREAM_HEADER_SIZE(12)字节的内容到coder->buffer中。lzma_bufcpy的参数含义是(in, in_pos, in_size, out, out_pos, out_size)。但是在拷贝前会检查源和目标剩余空间(in_avail, out_avail)是否够用。如果长度不够则退出。

    // Return if we didn't get the whole Stream Header yet.
    if (coder->pos < LZMA_STREAM_HEADER_SIZE)
        return LZMA_OK;
    coder->pos = 0;
  2. 解码头部信息。

    // Decode the Stream Header.
    const lzma_ret ret = lzma_stream_header_decode(
            &coder->stream_flags, coder->buffer);
    if (ret != LZMA_OK)
        return ret == LZMA_FORMAT_ERROR && !coder->first_stream
                ? LZMA_DATA_ERROR : ret;


extern LZMA_API(lzma_ret)
lzma_stream_header_decode(lzma_stream_flags *options, const uint8_t *in)

-- 2.1 比较magic

    // Magic
    if (memcmp(in, lzma_header_magic, sizeof(lzma_header_magic)) != 0)
        return LZMA_FORMAT_ERROR;

-- 2.2 比较in + 6的2字节CRC值和in + 6 + 2处保存的是否一致。

    // Verify the CRC32 so we can distinguish between corrupt
    // and unsupported files.
    const uint32_t crc = lzma_crc32(in + sizeof(lzma_header_magic),
            LZMA_STREAM_FLAGS_SIZE, 0);
    if (crc != read32le(in + sizeof(lzma_header_magic)
            + LZMA_STREAM_FLAGS_SIZE))
        return LZMA_DATA_ERROR;

-- 2.3 解码flags。其实只用来确定options->check = in1 & 0x0f。

    // Stream Flags
    if (stream_flags_decode(options, in + sizeof(lzma_header_magic)))
        return LZMA_OPTIONS_ERROR;

    // Set Backward Size to indicate unknown value. That way
    // lzma_stream_flags_compare() can be used to compare Stream Header
    // and Stream Footer while keeping it useful also for comparing
    // two Stream Footers.
    options->backward_size = LZMA_VLI_UNKNOWN;

    return LZMA_OK;
  1. 拷贝刚才获取到的options->check,并进入下一个sequence。

    // If we are decoding concatenated Streams, and the later
    // Streams have invalid Header Magic Bytes, we give
    coder->first_stream = false;
    // Copy the type of the Check so that Block Header and Block
    // decoders see it.
    coder->block_options.check = coder->stream_flags.check;
    // Even if we return LZMA_*_CHECK below, we want
    // to continue from Block Header decoding.
    coder->sequence = SEQ_BLOCK_HEADER;
    // Detect if there's no integrity check or if it is
    // unsupported if those were requested by the application.
    if (coder->tell_no_check && coder->stream_flags.check
            == LZMA_CHECK_NONE)
        return LZMA_NO_CHECK;
    if (coder->tell_unsupported_check
            && !lzma_check_is_supported(
    if (coder->tell_any_check)
        return LZMA_GET_CHECK;



  1. 如果in[*in_pos]为0,则退出当前处理,设置sequence为SEQ_INDEX。如果不是则调用lzma_block_header_size_decode宏进行处理。值为 (x + 1) * 4 。

    // Fall through

    case SEQ_BLOCK_HEADER: {
    if (*in_pos >= in_size)
    return LZMA_OK;

    if (coder->pos == 0) {
        // Detect if it's Index.
        if (in[*in_pos] == 0x00) {
            coder->sequence = SEQ_INDEX;
        // Calculate the size of the Block Header. Note that
        // Block Header decoder wants to see this byte too
        // so don't advance *in_pos.
                = lzma_block_header_size_decode(
  2. 拷贝声明的header_size到code->buffer中。

    // Copy the Block Header to the internal buffer.
    lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
    // Return if we didn't get the whole Block Header yet.
    if (coder->pos < coder->block_options.header_size)
        return LZMA_OK;
    coder->pos = 0;
    // Version 1 is needed to support the .ignore_check option.
    coder->block_options.version = 1;
  3. 设置一个长度为LZMA_FILTERS_MAX + 1(4 + 1)的filters buffer。调用lzma_block_header_decoder解析头信息。

    // Set up a buffer to hold the filter chain. Block Header
    // decoder will initialize all members of this array so
    // we don't need to do it here.
    lzma_filter filters[LZMA_FILTERS_MAX + 1];
    coder->block_options.filters = filters;
    // Decode the Block Header.
            allocator, coder->buffer));

3.1 lzma_block_header_decoder的定义如下。初始化所有的filters。

extern LZMA_API(lzma_ret)
lzma_block_header_decode(lzma_block *block,
        const lzma_allocator *allocator, const uint8_t *in)
    // NOTE: We consider the header to be corrupt not only when the
    // CRC32 doesn't match, but also when variable-length integers
    // are invalid or over 63 bits, or if the header is too small
    // to contain the claimed information.

    // Initialize the filter options array. This way the caller can
    // safely free() the options even if an error occurs in this function.
    for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) {
        block->filters[i].id = LZMA_VLI_UNKNOWN;
        block->filters[i].options = NULL;

    // Versions 0 and 1 are supported. If a newer version was specified,
    // we need to downgrade it.
    if (block->version > 1)
        block->version = 1;

    // This isn't a Block Header option, but since the decompressor will
    // read it if version >= 1, it's better to initialize it here than
    // to expect the caller to do it since in almost all cases this
    // should be false.
    block->ignore_check = false;

3.2 调用lzma_block_header_size_decode宏(复习一下, (x+1) * 4)来对比数据。并校验节的crc32。

    // Validate Block Header Size and Check type. The caller must have
    // already set these, so it is a programming error if this test fails.
    if (lzma_block_header_size_decode(in[0]) != block->header_size
            || (unsigned int)(block->check) > LZMA_CHECK_ID_MAX)
        return LZMA_PROG_ERROR;

    // Exclude the CRC32 field.
    const size_t in_size = block->header_size - 4;

    // Verify CRC32
    if (lzma_crc32(in, in_size, 0) != read32le(in + in_size))
        return LZMA_DATA_ERROR;

    // Check for unsupported flags.
    if (in[1] & 0x3C)
        return LZMA_OPTIONS_ERROR;

    // Start after the Block Header Size and Block Flags fields.
    size_t in_pos = 2;

3.3 这里出现了一个lzma_vli_decode函数。vli代表“variable length integer”。变长整数的范围是0~ 0x7fffffff`ffffffff,最长耗费9字节。lzma_vli_decode(vli, vli_pos, in, in_pos, in_size)会做一些校验,并正确实现转换。


    // Compressed Size
    if (in[1] & 0x40) {
                NULL, in, &in_pos, in_size));

        // Validate Compressed Size. This checks that it isn't zero
        // and that the total size of the Block is a valid VLI.
        if (lzma_block_unpadded_size(block) == 0)
            return LZMA_DATA_ERROR;
    } else {
        block->compressed_size = LZMA_VLI_UNKNOWN;

3.4 同样的,获取解压后的大小。

    // Uncompressed Size
    if (in[1] & 0x80)
                NULL, in, &in_pos, in_size));
        block->uncompressed_size = LZMA_VLI_UNKNOWN;

3.5 接下来处理各种filter。最多可以有4个。

    // Filter Flags
    const size_t filter_count = (in[1] & 3U) + 1;
    for (size_t i = 0; i < filter_count; ++i) {
        const lzma_ret ret = lzma_filter_flags_decode(
                &block->filters[i], allocator,
                in, &in_pos, in_size);
        if (ret != LZMA_OK) {
            free_properties(block, allocator);
            return ret;

3.5.1 lzma_filter_flags_decode稍微麻烦点,贴一下代码看看。首先,解码filter id。id最大序号是1 << 62 - 1。 然后,解码property size。property size最大不能超过剩余长度。然后调用lzma_properties_decode进一步解析属性。

extern LZMA_API(lzma_ret)
        lzma_filter *filter, const lzma_allocator *allocator,
        const uint8_t *in, size_t *in_pos, size_t in_size)
    // Set the pointer to NULL so the caller can always safely free it.
    filter->options = NULL;

    // Filter ID
    return_if_error(lzma_vli_decode(&filter->id, NULL,
            in, in_pos, in_size));

    if (filter->id >= LZMA_FILTER_RESERVED_START)
        return LZMA_DATA_ERROR;

    // Size of Properties
    lzma_vli props_size;
    return_if_error(lzma_vli_decode(&props_size, NULL,
            in, in_pos, in_size));

    // Filter Properties
    if (in_size - *in_pos < props_size)
        return LZMA_DATA_ERROR;

    const lzma_ret ret = lzma_properties_decode(
            filter, allocator, in + *in_pos, props_size);

    *in_pos += props_size;

    return ret;

3.5.2 lzma_properties_decode代码如下。对filter->id搜索合适的decoder。根据xz的配置可以有不同的decoder,Linux中的支持全部9种decoder,逆向结果如下(代码被高度优化,但结果就是会遍历9项,而decoder最多也支持9种)。

每个decoder由四个段组成,分别是{.id = xx, .init = 初始化函数, .memusage = null 或者对应函数, .props_decode = props_decode函数(通常都是lzma_simple_props_decode,少数不同)}。

__int64 __fastcall lzma_properties_decode(_QWORD *a1, __int64 a2, __int64 a3, __int64 a4)
  a1[1] = 0LL;
  v4 = 0LL;
  for ( i = 0x4000000000000001LL; *a1 != i; i = *((_QWORD *)&unk_26CC0 + 4 * v4) )
    if ( ++v4 == 9 )
      return 8LL;

extern LZMA_API(lzma_ret)
lzma_properties_decode(lzma_filter *filter, const lzma_allocator *allocator,
        const uint8_t *props, size_t props_size)
    // Make it always NULL so that the caller can always safely free() it.
    filter->options = NULL;

    const lzma_filter_decoder *const fd = decoder_find(filter->id);
    if (fd == NULL)
        return LZMA_OPTIONS_ERROR;

    if (fd->props_decode == NULL)
        return props_size == 0 ? LZMA_OK : LZMA_OPTIONS_ERROR;


    return fd->props_decode(
            &filter->options, allocator, props, props_size);
} 为了方便后续理解,这里把三种都读一遍。第一个是lzma_lzma_props_decode。要求prop_size为5,

extern lzma_ret
lzma_lzma_props_decode(void **options, const lzma_allocator *allocator,
        const uint8_t *props, size_t props_size)
    if (props_size != 5)
        return LZMA_OPTIONS_ERROR;

    lzma_options_lzma *opt
            = lzma_alloc(sizeof(lzma_options_lzma), allocator);
    if (opt == NULL)
        return LZMA_MEM_ERROR; 调用lzma_lzma_lclppb_decode。字节最多不超过24*9+8=224。然后设置pb/lp/lc,说是要看规范,这里先不管了。

    if (lzma_lzma_lclppb_decode(opt, props[0]))
        goto error;

extern bool
lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
    if (byte > (4 * 5 + 4) * 9 + 8)
        return true;

    // See the file format specification to understand this.
    options->pb = byte / (9 * 5);
    byte -= options->pb * 9 * 5;
    options->lp = byte / 9;
    options->lc = byte - options->lp * 9;

    return options->lc + options->lp > LZMA_LCLP_MAX;
} 接下来看另一个,lzma_lzma2_props_decode。要求prop_size为1,该属性决定其字典大小。

extern lzma_ret
lzma_lzma2_props_decode(void **options, const lzma_allocator *allocator,
        const uint8_t *props, size_t props_size)
    if (props_size != 1)
        return LZMA_OPTIONS_ERROR;

    // Check that reserved bits are unset.
    if (props[0] & 0xC0)
        return LZMA_OPTIONS_ERROR;

    // Decode the dictionary size.
    if (props[0] > 40)
        return LZMA_OPTIONS_ERROR;

    lzma_options_lzma *opt = lzma_alloc(
            sizeof(lzma_options_lzma), allocator);
    if (opt == NULL)
        return LZMA_MEM_ERROR;

    if (props[0] == 40) {
        opt->dict_size = UINT32_MAX;
    } else {
        opt->dict_size = 2 | (props[0] & 1U);
        opt->dict_size <<= props[0] / 2U + 11;

    opt->preset_dict = NULL;
    opt->preset_dict_size = 0;

    *options = opt;

    return LZMA_OK;
} 最后是lzma_simple_props_decode。大小可以为0~4字节。可以用来设置start_offset。

extern lzma_ret
lzma_simple_props_decode(void **options, const lzma_allocator *allocator,
        const uint8_t *props, size_t props_size)
    if (props_size == 0)
        return LZMA_OK;

    if (props_size != 4)
        return LZMA_OPTIONS_ERROR;

    lzma_options_bcj *opt = lzma_alloc(
            sizeof(lzma_options_bcj), allocator);
    if (opt == NULL)
        return LZMA_MEM_ERROR;

    opt->start_offset = read32le(props);

    // Don't leave an options structure allocated if start_offset is zero.
    if (opt->start_offset == 0)
        lzma_free(opt, allocator);
        *options = opt;

    return LZMA_OK;
} 回到上层lzma_lzma_props_decode中,设置dict_size,函数退出。

    // All dictionary sizes are accepted, including zero. LZ decoder
    // will automatically use a dictionary at least a few KiB even if
    // a smaller dictionary is requested.
    opt->dict_size = read32le(props + 1);

    opt->preset_dict = NULL;
    opt->preset_dict_size = 0;

    *options = opt;

    return LZMA_OK;

    lzma_free(opt, allocator);

3.5.3 处理剩余padding部分
// Padding
while (in_pos < in_size) {
if (in[in_pos++] != 0x00) {
free_properties(block, allocator);

            // Possibly some new field present so use
            // LZMA_OPTIONS_ERROR instead of LZMA_DATA_ERROR.
            return LZMA_OPTIONS_ERROR;

    return LZMA_OK;

3.6 终于返回最外层,stream_decode里面。循环遍历,如果有内存消耗计算器则调用并添加,如果没有则用1024近似替代。其实memusage函数也很简单,就是统计结构体+字典的内存占用。

    // If LZMA_IGNORE_CHECK was used, this flag needs to be set.
    // It has to be set after lzma_block_header_decode() because
    // it always resets this to false.
    coder->block_options.ignore_check = coder->ignore_check;

    // Check the memory usage limit.
    const uint64_t memusage = lzma_raw_decoder_memusage(filters);
    lzma_ret ret;

extern uint64_t
lzma_lz_decoder_memusage(size_t dictionary_size)
    return sizeof(lzma_coder) + (uint64_t)(dictionary_size);

3.7 这个主要为了避免字典过大(超过coder->memlimit)。如果一切ok,则调用lzma_block_decoder_init。

    if (memusage == UINT64_MAX) {
        // One or more unknown Filter IDs.
        ret = LZMA_OPTIONS_ERROR;
    } else {
        // Now we can set coder->memusage since we know that
        // the filter chain is valid. We don't want
        // lzma_memusage() to return UINT64_MAX in case of
        // invalid filter chain.
        coder->memusage = memusage;

        if (memusage > coder->memlimit) {
            // The chain would need too much memory.
            ret = LZMA_MEMLIMIT_ERROR;
        } else {
            // Memory usage is OK.
            // Initialize the Block decoder.
            ret = lzma_block_decoder_init(

3.7.1 lzma_block_decoder_init调用lzma_next_code_init来设置next->init为lzma_block_decoder_init。然后对数据进行校验。这个函数和最上面介绍的lzma_stream_decoder_init其实很像。

extern lzma_ret
lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
        lzma_block *block)
    lzma_next_coder_init(&lzma_block_decoder_init, next, allocator);

    // Validate the options. lzma_block_unpadded_size() does that for us
    // except for Uncompressed Size and filters. Filters are validated
    // by the raw decoder.
    if (lzma_block_unpadded_size(block) == 0
            || !lzma_vli_is_valid(block->uncompressed_size))
        return LZMA_PROG_ERROR;

3.7.2 如果没有next->coder则初始化它。code设置为block_decode。然后进行其他的初始化。

// Allocate *next->coder if needed.
lzma_block_coder *coder = next->coder;
if (coder == NULL) {
    coder = lzma_alloc(sizeof(lzma_block_coder), allocator);
    if (coder == NULL)
        return LZMA_MEM_ERROR;

    next->coder = coder;
    next->code = &block_decode;
    next->end = &block_decoder_end;
    coder->next = LZMA_NEXT_CODER_INIT;

// Basic initializations
coder->sequence = SEQ_CODE;
coder->block = block;
coder->compressed_size = 0;
coder->uncompressed_size = 0;

// If Compressed Size is not known, we calculate the maximum allowed
// value so that encoded size of the Block (including Block Padding)
// is still a valid VLI and a multiple of four.
        = block->compressed_size == LZMA_VLI_UNKNOWN
            ? (LZMA_VLI_MAX & ~LZMA_VLI_C(3))
                - block->header_size
                - lzma_check_size(block->check)
            : block->compressed_size;

3.7.3 最后一部分是对lzma_check_init的调用,其实就是初始化crc32/64对应的字段为0。然后循环调用lzma_raw_decoder_init,直到所有的filter都处理完成。

    // Initialize the check. It's caller's problem if the Check ID is not
    // supported, and the Block decoder cannot verify the Check field.
    // Caller can test lzma_check_is_supported(block->check).
    coder->check_pos = 0;
    lzma_check_init(&coder->check, block->check);

    coder->ignore_check = block->version >= 1
            ? block->ignore_check : false;

    // Initialize the filter chain.
    return lzma_raw_decoder_init(&coder->next, allocator,

3.8 回到外层。清理之前的临时filters对象。并设置序列状态为SEQ_BLOCK。

    // Free the allocated filter options since they are needed
    // only to initialize the Block decoder.
    for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i)
        lzma_free(filters[i].options, allocator);

    coder->block_options.filters = NULL;

    // Check if memory usage calculation and Block enocoder
    // initialization succeeded.
    if (ret != LZMA_OK)
        return ret;

    coder->sequence = SEQ_BLOCK;


psproc 源码阅读 - 5


/********** show one process (NULL proc prints header) **********/

//#define SPACE_AMOUNT page_size
#define SPACE_AMOUNT 144

static char *saved_outbuf;

void show_one_proc(const proc_t *restrict const p, const format_node *restrict fmt) {
    /* unknown: maybe set correct & actual to 1, remove +/- 1 below */
    int correct  = 0;  /* screen position we should be at */
    int actual   = 0;  /* screen position we are at */
    int amount   = 0;  /* amount of text that this data is */
    int leftpad  = 0;  /* amount of space this column _could_ need */
    int space    = 0;  /* amount of space we actually need to print */
    int dospace  = 0;  /* previous column determined that we need a space */
    int legit    = 0;  /* legitimately stolen extra space */
    int sz       = 0;  /* real size of data in outbuffer */
    int tmpspace = 0;
    char *restrict const outbuf = saved_outbuf;
    static int did_stuff = 0;  /* have we ever printed anything? */


    if(-1==(long)p) {   /* true only once, at the end */
        if(did_stuff) return;
        /* have _never_ printed anything, but might need a header */
        if(!--lines_to_next_header) {
            lines_to_next_header = header_gap;
        /* fprintf(stderr, "No processes available.\n"); */  /* legal? */

如果是其他情况,则生成这样的递归栈输出fmt。fmt(format node)就是那一组预定义的标头和格式化的数组。

    if(p) { /* not header, maybe we should call ourselves for it */
        if(!--lines_to_next_header) {
            lines_to_next_header = header_gap;
    did_stuff = 1;
    if(active_cols>(int)OUTBUF_SIZE) fprintf(stderr,_("fix bigness error\n"));

    /* print row start sequence */
    for(;;) {
        legit = 0; 

        if(fmt->next) {
            max_rightward = fmt->width;
            tmpspace = 0;
        } else {
            tmpspace = correct-actual;
            if (tmpspace<1) {
                tmpspace = dospace;
                max_rightward = active_cols-actual-tmpspace;
            } else {
                max_rightward = active_cols - ( (correct>actual) ? correct : actual );
        if(max_rightward <= 0) max_rightward = 0;
        else if(max_rightward >= OUTBUF_SIZE) max_rightward = OUTBUF_SIZE-1;

        max_leftward  = fmt->width + actual - correct; /* TODO check this */
        if(max_leftward <= 0) max_leftward = 0;
        else if(max_leftward >= OUTBUF_SIZE) max_leftward = OUTBUF_SIZE-1;


static int pr_wchan(char *restrict const outbuf, const proc_t *restrict const pp) {
    const char *w;
    size_t len;
    setREL1(WCHAN_NAME)  //<-- 如果没有设置outbuf,设置rel_WCHAN_NAME,设置完会退出。如果outbuf有值则不管。 这里outbuf是由saved_outbuf(= outbuf + SPACE_AMOUNT == 144,还记得最早的时候初始化的那个带保护页的区域吗……)传来的,因此有值。
    w = rSv(WCHAN_NAME, str, pp); //<-- rSv复习一下,就是pp->head[rel_WCHAN_NAME].result.str。


    if (!(R->result.str = strdup(lookup_wchan(P->tid)))) I->seterr = 1;;


    len = strlen(w);
    if(len>max_rightward) len=max_rightward;
    memcpy(outbuf, w, len);
    outbuf[len] = '\0';
    return len;


        /* prepare data and calculate leftpad */
        if(p && fmt->pr) amount = (*fmt->pr)(outbuf,p);
        else amount = snprintf(outbuf, OUTBUF_SIZE, "%s", fmt->name); /* AIX or headers */


        if(amount < 0) outbuf[amount = 0] = '\0';
        else if(amount >= OUTBUF_SIZE) outbuf[amount = OUTBUF_SIZE-1] = '\0';

        switch((fmt->flags) & CF_JUST_MASK) {
        case 0:  /* for AIX, assigned outside this file */
            leftpad = 0;
        case CF_LEFT:          /* bad */
            leftpad = 0;
        case CF_RIGHT:     /* OK */
            leftpad = fmt->width - amount;
            if(leftpad < 0) leftpad = 0;
        case CF_SIGNAL:
            /* if the screen is wide enough, use full 16-character output */
            if(wide_signals) {
                leftpad = 16 - amount;
                legit = 7;
            } else {
                leftpad =  9 - amount;
            if(leftpad < 0) leftpad = 0;
        case CF_USER:       /* bad */
            leftpad = fmt->width - amount;
            if(leftpad < 0) leftpad = 0;
            if(!user_is_number) leftpad = 0;
        case CF_WCHAN:       /* bad */
            if(wchan_is_number) {
                leftpad = fmt->width - amount;
                if(leftpad < 0) leftpad = 0;
            } else {
                if ((active_cols-actual-tmpspace)<1)
                    outbuf[1] = '\0';  /* oops, we (mostly) lose this column... */
                leftpad = 0;
        case CF_UNLIMITED:
            if(active_cols-actual-tmpspace < 1)
                outbuf[1] = '\0';    /* oops, we (mostly) lose this column... */
            leftpad = 0;
            fprintf(stderr, _("bad alignment code\n"));
        /* At this point:
         * correct   from previous column
         * actual    from previous column
         * amount    not needed (garbage due to chopping)
         * leftpad   left padding for this column alone (not make-up or gap)
         * space     not needed (will recalculate now)
         * dospace   if we require space between this and the prior column
         * legit     space we were allowed to steal, and thus did steal
        space = correct - actual + leftpad;
        if(space<1) space=dospace;
        if(space>SPACE_AMOUNT) space=SPACE_AMOUNT;  // only so much available

        /* real size -- don't forget in 'amount' is number of cells */
        outbuf[OUTBUF_SIZE-1] = '\0';
        sz = strlen(outbuf);

        /* print data, set x position stuff */
        if(!fmt->next) {
            /* Last column. Write padding + data + newline all together. */
            outbuf[sz] = '\n';
            fwrite(outbuf-space, space+sz+1, 1, stdout);
        /* Not the last column. Write padding + data together. */
        fwrite(outbuf-space, space+sz, 1, stdout);
        actual  += space+amount;
        correct += fmt->width;
        correct += legit;        /* adjust for SIGNAL expansion */
        if(fmt->pr && fmt->next->pr) { /* neither is AIX filler */
            dospace = 1;
        } else {
            dospace = 0;
        fmt = fmt->next;
        /* At this point:
         * correct   screen position we should be at
         * actual    screen position we are at
         * amount    not needed
         * leftpad   not needed
         * space     not needed
         * dospace   if have determined that we need a space next time
         * legit     not needed

psproc 源码阅读 - 4


    finalize_stacks(); //<===

    if(forest_type || sort_list) fancy_spew(); 
    else simple_spew(); /* no sort, no forest */
    show_one_proc((proc_t *)-1,format_list); /* no output yet? */

    return 0;


static void finalize_stacks (void)
    format_node *f_node;
    sort_node *s_node;

#if (PIDSITEMS < 60)
# error PIDSITEMS (common.h) should be at least 60!

    /* first, ensure minimum result structures for items
       which may or may not actually be displayable ... */
    Pids_index = 0;

    // needed by for selections

    // now accommodate any results not yet satisfied
    f_node = format_list;
    while (f_node) {
        (*f_node->pr)(NULL, NULL);
        f_node = f_node->next;
    s_node = sort_list;
    while (s_node) {
        if (s_node->xe) (*s_node->xe)(NULL, NULL);
        s_node = s_node->next;

    procps_pids_reset(Pids_info, Pids_items, Pids_index);


#define namREL(e) rel_ ## e
#define makEXT(e) extern int namREL(e);
#define makREL(e) int namREL(e) = -1;
#define chkREL(e) if (namREL(e) < 0) { \
      Pids_items[Pids_index] = PIDS_ ## e; \
      namREL(e) = (Pids_index < PIDSITEMS) ? Pids_index++ : rel_noop; }


   if(rel_XX < 0) {
     Pids_items[Pids_index] = PIDS_XX;
     rel_XX = (Pids_index < PIDSITEMS) ? Pids_index++ : rel_noop;


回到main中,下一个函数是fancy_spew,当然仅当开启forest_type / sort_list后才调用。

    if(forest_type || sort_list) fancy_spew();  //<---
    else simple_spew(); /* no sort, no forest */
    show_one_proc((proc_t *)-1,format_list); /* no output yet? */

    return 0;


/***** sorted or forest */
static void fancy_spew(void) {
    struct pids_fetch *pidread;
    enum pids_fetch_type which;
    proc_t *buf;
    int i, n = 0;

    which = (thread_flags & TF_loose_tasks)

    pidread = procps_pids_reap(Pids_info, which);
    if (!pidread || !pidread->counts->total) {
        fprintf(stderr, _("fatal library error, reap\n"));
    processes = xcalloc(pidread->counts->total, sizeof(void*));
    for (i = 0; i < pidread->counts->total; i++) {
        buf = pidread->stacks[i];
        if (want_this_proc(buf))
            processes[n++] = buf;
    if (n) {
        if(forest_type) prep_forest_sort();
        while(sort_list) {
            procps_pids_sort(Pids_info, processes, n, sort_list->sr, sort_list->reverse);
            sort_list = sort_list->next;
        if(forest_type) show_forest(n);
        else show_proc_array(n);


/* procps_pids_reap():
 * Harvest all the available tasks/threads and provide the result
 * stacks along with a summary of the information gathered.
 * Returns: pointer to a pids_fetch struct on success, NULL on error.
PROCPS_EXPORT struct pids_fetch *procps_pids_reap (
    struct pids_info *info,
    enum pids_fetch_type which)
    int rc;

    errno = EINVAL;
    if (info == NULL)
        return NULL;
    if (which != PIDS_FETCH_TASKS_ONLY && which != PIDS_FETCH_THREADS_TOO)
        return NULL;
    /* with items & numitems technically optional at 'new' time, it's
       expected 'reset' will have been called -- but just in case ... */
    if (!info->curitems)
        return NULL;
    errno = 0;

    if (!pids_oldproc_open(&info->fetch_PT, info->oldflags))
        return NULL;
    info->read_something = which ? readeither : readproc;

    rc = pids_stacks_fetch(info);

    // we better have found at least 1 pid
    return (rc > 0) ? &info->fetch.results : NULL;
} // end: procps_pids_reap


static inline int pids_oldproc_open (
    PROCTAB **this,
    unsigned flags,
    va_list vl;
    int *ids;
    int num = 0;

    if (*this == NULL) {
        va_start(vl, flags);
        ids = va_arg(vl, int*);
        if (flags & PROC_UID) num = va_arg(vl, int);
        if (NULL == (*this = openproc(flags, ids, num)))
            return 0;
    return 1;
} // end: pids_oldproc_open


// initiate a process table scan
PROCTAB *openproc(unsigned flags, ...) {
    va_list ap;
    struct stat sbuf;
    static __thread int did_stat;
    PROCTAB *PT = calloc(1, sizeof(PROCTAB));

    if (!PT)
        return NULL;
    if (!did_stat) {
        task_dir_missing = stat("/proc/self/task", &sbuf);
        did_stat = 1;


    PT->taskdir = NULL;
    PT->taskdir_user = -1;
    PT->taskfinder = simple_nexttid;
    PT->taskreader = simple_readtask;

    PT->reader = simple_readproc;
    if (flags & PROC_PID) {
        PT->procfs = NULL;
        PT->finder = listed_nextpid;
    } else {
        PT->procfs = opendir("/proc");
        if (!PT->procfs) {
            return NULL;
        PT->finder = simple_nextpid;
    PT->flags = flags;


    va_start(ap, flags);
    if (flags & PROC_PID)
        PT->pids = va_arg(ap, pid_t*);
    else if (flags & PROC_UID) {
        PT->uids = va_arg(ap, uid_t*);
        PT->nuid = va_arg(ap, int);

MAX_BUFSZ为1024 * 64 * 2字节。这里初始化src_buffer和dst_buffer(都是全局变量)。

    if (!src_buffer
            && !(src_buffer = malloc(MAX_BUFSZ))) {
        return NULL;
    if (!dst_buffer
            && !(dst_buffer = malloc(MAX_BUFSZ))) {
        return NULL;

    return PT;


static int pids_stacks_fetch (
    struct pids_info *info)
#define n_alloc  info->fetch.n_alloc
#define n_inuse  info->fetch.n_inuse
#define n_saved  info->fetch.n_alloc_save
    struct stacks_extent *ext;


    // initialize stuff -----------------------------------
    if (!info->fetch.anchor) {
        if (!(info->fetch.anchor = calloc(STACKS_INIT, sizeof(void *))))
            return -1;
        if (!(ext = pids_stacks_alloc(info, STACKS_INIT)))
            return -1;       // here, errno was set to ENOMEM
        memcpy(info->fetch.anchor, ext->stacks, sizeof(void *) * STACKS_INIT);
        n_alloc = STACKS_INIT;
    memset(&info->fetch.counts, 0, sizeof(struct pids_counts));


    // iterate stuff --------------------------------------
    n_inuse = 0;
    while (info->read_something(info->fetch_PT, &info->fetch_proc)) {


// readeither: return a pointer to a proc_t filled with requested info about
// the next unique process or task available.  If no more are available,
// return a null pointer (boolean false).
proc_t *readeither (PROCTAB *restrict const PT, proc_t *restrict x) {
    static __thread proc_t skel_p;    // skeleton proc_t, only uses tid + tgid
    static __thread proc_t *new_p;    // for process/task transitions
    static __thread int canary, leader;
    char path[PROCPATHLEN];
    proc_t *ret;


    if (new_p) {
        if (new_p->tid != canary) new_p = NULL;
        goto next_task;

    new_p = NULL;
    for (;;) {
        if (errno == ENOMEM) goto end_procs;
        // fills in the PT->path, plus skel_p.tid and skel_p.tgid
        if (!PT->finder(PT,&skel_p)) goto end_procs;       // simple_nextpid
        leader = skel_p.tid;
        if (!task_dir_missing) break;
        if ((ret = PT->reader(PT,x))) return ret;          // simple_readproc

    // fills in our path, plus x->tid and x->tgid
    if (!(PT->taskfinder(PT,&skel_p,x,path)))              // simple_nexttid
        goto next_proc;
    /* to avoid loss of some thread group leader data,
       we must check its base dir, not its 'task' dir! */
    if (x->tid == leader) ret = PT->reader(PT,x);          // simple_readproc
    else ret = PT->taskreader(PT,x,path);                  // simple_readtask
    if (!ret) goto next_proc;
    if (!new_p) {
        new_p = ret;
        canary = new_p->tid;
    return ret;

    return NULL;


// This finds processes in /proc in the traditional way.
// Return non-zero on success.
static int simple_nextpid(PROCTAB *restrict const PT, proc_t *restrict const p) {
static __thread struct dirent *ent; /* dirent handle */
char *restrict const path = PT->path;
for (;;) {
ent = readdir(PT->procfs);
if(!ent || !ent->d_name[0]) return 0;
if(*ent->d_name > '0' && *ent->d_name <= '9') break;
p->tgid = strtoul(ent->d_name, NULL, 10);
p->tid = p->tgid;
snprintf(path, PROCPATHLEN, "/proc/%s", ent->d_name);
return 1;


// This reads process info from /proc in the traditional way, for one process.
// The pid (tgid? tid?) is already in p, and a path to it in path, with some
// room to spare.
static proc_t *simple_readproc(PROCTAB *restrict const PT, proc_t *restrict const p) {
    static __thread struct utlbuf_s ub = { NULL, 0 };    // buf for stat,statm,status
    static __thread struct stat sb;     // stat() buffer
    char *restrict const path = PT->path;
    unsigned flags = PT->flags;
    int rc = 0;

    if (stat(path, &sb) == -1)                  /* no such dirent (anymore) */
        goto next_proc;

    if ((flags & PROC_UID) && !XinLN(uid_t, sb.st_uid, PT->uids, PT->nuid))
        goto next_proc;                      /* not one of the requested uids */


/* Test if item X of type T is present in the 0 terminated list L */
#   define XinL(T, X, L) ( {                    \
            T  x = (X), *l = (L);               \
            while (*l && *l != x) l++;          \
            *l == x;                            \
        } )

/* Test if item X of type T is present in the list L of length N */
#   define XinLN(T, X, L, N) ( {                \
            T x = (X), *l = (L);                \
            int i = 0, n = (N);                 \
            while (i < n && l[i] != x) i++;     \
            i < n && l[i] == x;                 \
        } )


static int file2str(const char *directory, const char *what, struct utlbuf_s *ub) {
#define buffGRW 1024
    char path[PROCPATHLEN];
    int fd, num, tot_read = 0, len;

    /* on first use we preallocate a buffer of minimum size to emulate
       former 'local static' behavior -- even if this read fails, that
       buffer will likely soon be used for another subdirectory anyway
       ( besides, with the calloc call we will never need use memcpy ) */
    if (ub->buf) ub->buf[0] = '\0';
    else {
        ub->buf = calloc(1, (ub->siz = buffGRW));
        if (!ub->buf) return -1;
    len = snprintf(path, sizeof path, "%s/%s", directory, what);
    if (len <= 0 || (size_t)len >= sizeof path) return -1;
    if (-1 == (fd = open(path, O_RDONLY, 0))) return -1;
    while (0 < (num = read(fd, ub->buf + tot_read, ub->siz - tot_read))) {
        tot_read += num;
        if (tot_read < ub->siz) break;
        if (ub->siz >= INT_MAX - buffGRW) {
        if (!(ub->buf = realloc(ub->buf, (ub->siz += buffGRW)))) {
            return -1;
    ub->buf[tot_read] = '\0';
    if (tot_read < 1) return -1;
    return tot_read;
#undef buffGRW


    p->euid = sb.st_uid;                        /* need a way to get real uid */
    p->egid = sb.st_gid;                        /* need a way to get real gid */


$ cat /proc/13/stat
13 (bash) S 12 13 12 1025 0 0 0 0 0 0 21 59 120 489 20 0 1 0 34 213183188992 1011 18446744073709551615 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

stat2proc(是的我没贴代码……不然太多了)会去查找( ) 括号中的内容,这个是来自task_struct结构的进程名,最长只有15字节。为了处理进程名中的特殊字符,它还会对其进行简单替换,然后对剩余内容进行扫描(sscanf)并保存到结构体中。

    if (flags & PROC_FILLSTAT) {                // read /proc/#/stat
        if (file2str(path, "stat", &ub) == -1)
            goto next_proc;
        rc += stat2proc(ub.buf, p);


    if (flags & PROC_FILLIO) {                  // read /proc/#/io
        if (file2str(path, "io", &ub) != -1)
            io2proc(ub.buf, p);


    if (flags & PROC_FILLSMAPS) {               // read /proc/#/smaps_rollup
        if (file2str(path, "smaps_rollup", &ub) != -1)
            smaps2proc(ub.buf, p);


    if (flags & PROC_FILLMEM) {                 // read /proc/#/statm
        if (file2str(path, "statm", &ub) != -1)
            statm2proc(ub.buf, p);


    if (flags & PROC_FILLSTATUS) {              // read /proc/#/status
        if (file2str(path, "status", &ub) != -1) {
            rc += status2proc(ub.buf, p, 1);
            if (flags & (PROC_FILL_SUPGRP & ~PROC_FILLSTATUS))
                rc += supgrps_from_supgids(p);
            if (flags & (PROC_FILL_OUSERS & ~PROC_FILLSTATUS)) {
                p->ruser = pwcache_get_user(p->ruid);
                p->suser = pwcache_get_user(p->suid);
                p->fuser = pwcache_get_user(p->fuid);
            if (flags & (PROC_FILL_OGROUPS & ~PROC_FILLSTATUS)) {
                p->rgroup = pwcache_get_group(p->rgid);
                p->sgroup = pwcache_get_group(p->sgid);
                p->fgroup = pwcache_get_group(p->fgid);

    // if multithreaded, some values are crap
    if(p->nlwp > 1)
        p->wchan = ~0ul;

    /* some number->text resolving which is time consuming */
    /* ( names are cached, so memcpy to arrays was silly ) */
    if (flags & PROC_FILLUSR)
        p->euser = pwcache_get_user(p->euid);
    if (flags & PROC_FILLGRP)
        p->egroup = pwcache_get_group(p->egid);


    if (flags & PROC_FILLENV)                   // read /proc/#/environ
        if (!(p->environ_v = file2strvec(path, "environ")))
            rc += vectorize_dash_rc(&p->environ_v);
    if (flags & PROC_EDITENVRCVT)
        rc += fill_environ_cvt(path, p);


// This littl' guy just serves those true vectorized fields
// ( when a /proc source field didn't exist )
static int vectorize_dash_rc (char ***vec) {
    if (!(*vec = vectorize_this_str("-")))
        return 1;
    return 0;
char **vectorize_this_str (const char *src) {
#define pSZ  (sizeof(char*))
    char *cpy, **vec;
    size_t adj, tot;

    tot = strlen(src) + 1;                       // prep for our vectors
    if (tot < 1 || tot >= INT_MAX) tot = INT_MAX-1; // integer overflow?
    adj = (pSZ-1) - ((tot + pSZ-1) & (pSZ-1));   // calc alignment bytes
    cpy = calloc(1, tot + adj + (2 * pSZ));      // get new larger buffer
    if (!cpy) return NULL;                       // oops, looks like ENOMEM
    snprintf(cpy, tot, "%s", src);               // duplicate their string
    vec = (char**)(cpy + tot + adj);             // prep pointer to pointers
    *vec = cpy;                                  // point 1st vector to string
    *(vec+1) = NULL;                             // null ptr 'list' delimit
    return vec;                                  // ==> free(*vec) to dealloc
#undef pSZ


// This routine reads an 'environ' for the designated proc_t and
// guarantees the caller a valid proc_t.environ pointer.
static int fill_environ_cvt (const char *directory, proc_t *restrict p) {
    dst_buffer[0] = '\0';
    if (read_unvectored(src_buffer, MAX_BUFSZ, directory, "environ", ' '))
        escape_str(dst_buffer, src_buffer, MAX_BUFSZ);
    p->environ = strdup(dst_buffer[0] ? dst_buffer : "-");
    if (!p->environ)
        return 1;
    return 0;


// this is the former under utilized 'read_cmdline', which has been
// generalized in support of these new libproc flags:
static int read_unvectored(char *restrict const dst, unsigned sz, const char *whom, const char *what, char sep) {
    char path[PROCPATHLEN];
    int fd, len;
    unsigned n = 0;

    if(sz <= 0) return 0;
    if(sz >= INT_MAX) sz = INT_MAX-1;
    dst[0] = '\0';

    len = snprintf(path, sizeof(path), "%s/%s", whom, what);
    if(len <= 0 || (size_t)len >= sizeof(path)) return 0;
    fd = open(path, O_RDONLY);
    if(fd==-1) return 0;

    for(;;) {
        ssize_t r = read(fd,dst+n,sz-n);
        if(r==-1) {
            if(errno==EINTR) continue;
        if(r<=0) break;  // EOF
        n += r;
        if(n==sz) {      // filled the buffer
            --n;         // make room for '\0'
    if(n) {
        unsigned i = n;
        while(i && dst[i-1]=='\0') --i; // skip trailing zeroes
            if(dst[i]=='\n' || dst[i]=='\0') dst[i]=sep;
        if(dst[n-1]==' ') dst[n-1]='\0';
    dst[n] = '\0';
    return n;


static inline void esc_all (unsigned char *str) {
    unsigned char c;

    // if bad locale/corrupt str, replace non-printing stuff
    while (*str) {
        if ((c = ESC_tab[*str]) != '|')
            *str = c;

static inline void esc_ctl (unsigned char *str, int len) {
    int i, n;

    for (i = 0; i < len; ) {
        // even with a proper locale, strings might be corrupt
        if ((n = UTF_tab[*str]) < 0 || i + n > len) {
        // and eliminate those non-printing control characters
        if (*str < 0x20 || *str == 0x7f)
            *str = '?';
        str += n;
        i += n;

int escape_str (unsigned char *dst, const unsigned char *src, int bufsize) {
    static __thread int utf_sw = 0;
    int n;

    if (utf_sw == 0) {
        char *enc = nl_langinfo(CODESET);
        utf_sw = enc && strcasecmp(enc, "UTF-8") == 0 ? 1 : -1;
    SECURE_ESCAPE_ARGS(dst, bufsize);
    n = snprintf(dst, bufsize, "%s", src);
    if (n < 0) {
        *dst = '\0';
        return 0;
    if (n >= bufsize) n = bufsize-1;
    if (utf_sw < 0)
        esc_ctl(dst, n);
    return n;


    if (flags & PROC_FILLARG)                   // read /proc/#/cmdline
        if (!(p->cmdline_v = file2strvec(path, "cmdline")))
            rc += vectorize_dash_rc(&p->cmdline_v);
    if (flags & PROC_EDITCMDLCVT)
        rc += fill_cmdline_cvt(path, p);


// This routine reads a 'cmdline' for the designated proc_t, "escapes"
// the result into a single string while guaranteeing the caller a
// valid proc_t.cmdline pointer.
static int fill_cmdline_cvt (const char *directory, proc_t *restrict p) {
    if (read_unvectored(src_buffer, MAX_BUFSZ, directory, "cmdline", ' '))
        escape_str(dst_buffer, src_buffer, MAX_BUFSZ);
        escape_command(dst_buffer, p, MAX_BUFSZ, uFLG);
    p->cmdline = strdup(dst_buffer[0] ? dst_buffer : "?");
    if (!p->cmdline)
        return 1;
    return 0;
#undef uFLG


// Reads /proc/*/stat files, being careful not to trip over processes with
// names like ":-) 1 2 3 4 5 6".
static int stat2proc (const char *S, proc_t *restrict P) {
    char buf[64], raw[64];
        if (!P->cmd) {
            num = tmp - S;
            memcpy(raw, S, num);
            raw[num] = '\0';
            escape_str(buf, raw, sizeof(buf));
            if (!(P->cmd = strdup(buf))) return 1;    //<------------

int escape_command (unsigned char *outbuf, const proc_t *pp, int bytes, unsigned flags) {
    int overhead = 0;
    int end = 0;

    if (flags & ESC_BRACKETS)
        overhead += 2;
    if (flags & ESC_DEFUNCT) {
        if (pp->state == 'Z') overhead += 10;    // chars in " <defunct>"
        else flags &= ~ESC_DEFUNCT;
    if (overhead + 1 >= bytes) {
        // if no room for even one byte of the command name
        outbuf[0] = '\0';
        return 0;
    if (flags & ESC_BRACKETS)
        outbuf[end++] = '['; 
    end += escape_str(outbuf+end, pp->cmd, bytes-overhead);  //<----从cmd拷贝到outbuf+end。
    // we want "[foo] <defunct>", not "[foo <defunct>]"
    if (flags & ESC_BRACKETS)
        outbuf[end++] = ']'; 
    if (flags & ESC_DEFUNCT) {
        memcpy(outbuf+end, " <defunct>", 10);  
        end += 10;
    outbuf[end] = '\0';
    return end;  // bytes, not including the NUL


    if ((flags & PROC_FILLCGROUP))              // read /proc/#/cgroup
        if (!(p->cgroup_v = file2strvec(path, "cgroup")))
            rc += vectorize_dash_rc(&p->cgroup_v);
    if (flags & PROC_EDITCGRPCVT)
        rc += fill_cgroup_cvt(path, p);

    if (flags & PROC_FILLOOM) {
        if (file2str(path, "oom_score", &ub) != -1)
            oomscore2proc(ub.buf, p);
        if (file2str(path, "oom_score_adj", &ub) != -1)
            oomadj2proc(ub.buf, p);

    if (flags & PROC_FILLNS)                    // read /proc/#/ns/*
        procps_ns_read_pid(p->tid, &(p->ns));

    if (flags & PROC_FILLSYSTEMD)               // get sd-login.h stuff
        rc += sd2proc(p);

这里读取lxc container相关的内容。

    if (flags & PROC_FILL_LXC)                  // value the lxc name
        p->lxcname = lxc_containers(path);

    if (flags & PROC_FILL_LUID)                 // value the login user id
        p->luid = login_uid(path);


    if (flags & PROC_FILL_EXE) {
        if (!(p->exe = readlink_exe(path)))
            rc += 1;


    if (flags & PROC_FILLAUTOGRP)               // value the 2 autogroup fields
        autogroup_fill(path, p);

    if (rc == 0) return p;
    errno = ENOMEM;
    return NULL;



proc_t *readproc(PROCTAB *restrict const PT, proc_t *restrict p) {
    proc_t *ret;


    for(;;) {
        if (errno == ENOMEM) goto out;
        // fills in the path, plus p->tid and p->tgid
        if (!PT->finder(PT,p)) goto out;

        // go read the process data
        ret = PT->reader(PT,p);
        if(ret) return ret;

    return NULL;


        if (!(n_inuse < n_alloc)) {
            n_alloc += STACKS_GROW;
            if (!(info->fetch.anchor = realloc(info->fetch.anchor, sizeof(void *) * n_alloc))
                    || (!(ext = pids_stacks_alloc(info, STACKS_GROW))))
                return -1;   // here, errno was set to ENOMEM
            memcpy(info->fetch.anchor + n_inuse, ext->stacks, sizeof(void *) * STACKS_GROW);
        if (!pids_proc_tally(info, &info->fetch.counts, &info->fetch_proc))
            return -1;       // here, errno was set to ENOMEM
        if (!pids_assign_results(info, info->fetch.anchor[n_inuse++], &info->fetch_proc))
            return -1;       // here, errno was set to ENOMEM
    /* while the possibility is extremely remote, the readproc.c (read_something) |
       simple_readproc and simple_readtask guys could have encountered this error |
       in which case they would have returned a NULL, thus ending our while loop. | */
    if (errno == ENOMEM)
        return -1;

    // finalize stuff -------------------------------------
    /* note: we go to this trouble of maintaining a duplicate of the consolidated |
             extent stacks addresses represented as our 'anchor' since these ptrs |
             are exposed to a user (um, not that we don't trust 'em or anything). |
             plus, we can NULL delimit these ptrs which we couldn't do otherwise. | */
    if (n_saved < n_inuse + 1) {
        n_saved = n_inuse + 1;
        if (!(info->fetch.results.stacks = realloc(info->fetch.results.stacks, sizeof(void *) * n_saved)))
            return -1;
    memcpy(info->fetch.results.stacks, info->fetch.anchor, sizeof(void *) * n_inuse);
    info->fetch.results.stacks[n_inuse] = NULL;

    return n_inuse;     // callers beware, this might be zero !
#undef n_alloc
#undef n_inuse
#undef n_saved
} // end: pids_stacks_fetch