Librdkafka用于kafka topic-partition的相關數據結構和操作

  • topic-partition是kafka分布式的精華, 也是針對kafka進行生產或消費的最小單元;
  • 在這篇里我們開始介紹相關的數據結構
  • 內容如下:
    1. rd_kafka_topic_partition_t
    2. rd_kafka_topic_partition_list_t
    3. rd_kafka_toppar_s

rd_kafka_topic_partition_t
  • 所在文件: src/rdkafka.h
  • 定義了一個partition的相關數據結構, 簡單定義, 占位符
  • 定義:
typedef struct rd_kafka_topic_partition_s {
        char        *topic;             /**< Topic name */
        int32_t      partition;         /**< Partition */
    int64_t      offset;            /**< Offset */
        void        *metadata;          /**< Metadata */ // 主要是leader, replicas, isr等信息
        size_t       metadata_size;     /**< Metadata size */
        void        *opaque;            /**< Application opaque */
        rd_kafka_resp_err_t err;        /**< Error code, depending on use. */
        void       *_private;           /**< INTERNAL USE ONLY,
                                         *   INITIALIZE TO ZERO, DO NOT TOUCH */
} rd_kafka_topic_partition_t;

rd_kafka_topic_partition_list_t
  • 所在文件: src/rdkafka.h
  • 用來存儲 rd_kafka_topic_partition_t的可動態擴容的數組
  • 定義:
typedef struct rd_kafka_topic_partition_list_s {
        int cnt;               /**< Current number of elements */ 當前數組中放入的element數量
        int size;              /**< Current allocated size */ // 當前數組的容量
        rd_kafka_topic_partition_t *elems; /**< Element array[] */ 動態數組指針
} rd_kafka_topic_partition_list_t;
  • 擴容操作 rd_kafka_topic_partition_list_grow:
rd_kafka_topic_partition_list_grow (rd_kafka_topic_partition_list_t *rktparlist,
                                    int add_size) {
        if (add_size < rktparlist->size)
                add_size = RD_MAX(rktparlist->size, 32);

        rktparlist->size += add_size;
        // 使用realloc重新分配內存
        rktparlist->elems = rd_realloc(rktparlist->elems,
                                       sizeof(*rktparlist->elems) *
                                       rktparlist->size);

}
  • 創建操作 rd_kafka_topic_partition_list_new:
rd_kafka_topic_partition_list_t *rd_kafka_topic_partition_list_new (int size) {
        rd_kafka_topic_partition_list_t *rktparlist;
        rktparlist = rd_calloc(1, sizeof(*rktparlist));
        rktparlist->size = size;
        rktparlist->cnt = 0;
        if (size > 0)
                rd_kafka_topic_partition_list_grow(rktparlist, size);
        return rktparlist;
}
  • 查找操作 rd_kafka_topic_partition_list_find:
    topic和partition都相等才算是相等
rd_kafka_topic_partition_list_find (rd_kafka_topic_partition_list_t *rktparlist,
                     const char *topic, int32_t partition) {
    int i = rd_kafka_topic_partition_list_find0(rktparlist,
                            topic, partition);
    if (i == -1)
        return NULL;
    else
        return &rktparlist->elems[i];
}
  • 按索引刪除 rd_kafka_topic_partition_list_del_by_idx
rd_kafka_topic_partition_list_del_by_idx (rd_kafka_topic_partition_list_t *rktparlist,
                      int idx) {
    if (unlikely(idx < 0 || idx >= rktparlist->cnt))
        return 0;

        // element數量減1
    rktparlist->cnt--;
        // destory 刪除的元素 
    rd_kafka_topic_partition_destroy0(&rktparlist->elems[idx], 0);

        // 作內存的移動, 但不回收
    memmove(&rktparlist->elems[idx], &rktparlist->elems[idx+1],
        (rktparlist->cnt - idx) * sizeof(rktparlist->elems[idx]));

    return 1;
}
  • 排序rd_kafka_topic_partition_list_sort_by_topic
    topic名字不同按topic名字排,topic名字相同按partition排
void rd_kafka_topic_partition_list_sort_by_topic (
        rd_kafka_topic_partition_list_t *rktparlist) {
        rd_kafka_topic_partition_list_sort(rktparlist,
                                           rd_kafka_topic_partition_cmp, NULL);
}
rd_kafka_toppar_s
  • 所在文件: src/rdkafka_partition.h
  • 重量數據結構,topic, partition, leader, 生產, 消費, 各種定時timer都在里面
  • 定義, 這個結構體巨龐大
struct rd_kafka_toppar_s { /* rd_kafka_toppar_t */
    TAILQ_ENTRY(rd_kafka_toppar_s) rktp_rklink;  /* rd_kafka_t link */
    TAILQ_ENTRY(rd_kafka_toppar_s) rktp_rkblink; /* rd_kafka_broker_t link*/
        CIRCLEQ_ENTRY(rd_kafka_toppar_s) rktp_fetchlink; /* rkb_fetch_toppars */
    TAILQ_ENTRY(rd_kafka_toppar_s) rktp_rktlink; /* rd_kafka_itopic_t link*/
        TAILQ_ENTRY(rd_kafka_toppar_s) rktp_cgrplink;/* rd_kafka_cgrp_t link */
        rd_kafka_itopic_t       *rktp_rkt;
        shptr_rd_kafka_itopic_t *rktp_s_rkt;  /* shared pointer for rktp_rkt */
    int32_t            rktp_partition;
        //LOCK: toppar_lock() + topic_wrlock()
        //LOCK: .. in partition_available()
        int32_t            rktp_leader_id;   /**< Current leader broker id.
                                              *   This is updated directly
                                              *   from metadata. */
    rd_kafka_broker_t *rktp_leader;      /**< Current leader broker
                                              *   This updated asynchronously
                                              *   by issuing JOIN op to
                                              *   broker thread, so be careful
                                              *   in using this since it
                                              *   may lag. */
        rd_kafka_broker_t *rktp_next_leader; /**< Next leader broker after
                                              *   async migration op. */
    rd_refcnt_t        rktp_refcnt;
    mtx_t              rktp_lock;
 rd_atomic32_t      rktp_version;         /* Latest op version.
                                                  * Authoritative (app thread)*/
    int32_t            rktp_op_version;      /* Op version of curr command
                          * state from.
                          * (broker thread) */
        int32_t            rktp_fetch_version;   /* Op version of curr fetch.
                                                    (broker thread) */

    enum {
        RD_KAFKA_TOPPAR_FETCH_NONE = 0,
                RD_KAFKA_TOPPAR_FETCH_STOPPING,
                RD_KAFKA_TOPPAR_FETCH_STOPPED,
        RD_KAFKA_TOPPAR_FETCH_OFFSET_QUERY,
        RD_KAFKA_TOPPAR_FETCH_OFFSET_WAIT,
        RD_KAFKA_TOPPAR_FETCH_ACTIVE,
    } rktp_fetch_state;    
int32_t            rktp_fetch_msg_max_bytes; /* Max number of bytes to
                                                      * fetch.
                                                      * Locality: broker thread
                                                      */

        rd_ts_t            rktp_ts_fetch_backoff; /* Back off fetcher for
                                                   * this partition until this
                                                   * absolute timestamp
                                                   * expires. */

    int64_t            rktp_query_offset;    /* Offset to query broker for*/
    int64_t            rktp_next_offset;     /* Next offset to start
                                                  * fetching from.
                                                  * Locality: toppar thread */
    int64_t            rktp_last_next_offset; /* Last next_offset handled
                           * by fetch_decide().
                           * Locality: broker thread */
    int64_t            rktp_app_offset;      /* Last offset delivered to
                          * application + 1 */
    int64_t            rktp_stored_offset;   /* Last stored offset, but
                          * maybe not committed yet. */
        int64_t            rktp_committing_offset; /* Offset currently being
                                                    * committed */
    int64_t            rktp_committed_offset; /* Last committed offset */
    rd_ts_t            rktp_ts_committed_offset; /* Timestamp of last
                                                      * commit */

        struct offset_stats rktp_offsets; /* Current offsets.
                                           * Locality: broker thread*/
        struct offset_stats rktp_offsets_fin; /* Finalized offset for stats.
                                               * Updated periodically
                                               * by broker thread.
                                               * Locks: toppar_lock */

    int64_t rktp_hi_offset;              /* Current high offset.
                          * Locks: toppar_lock */
        int64_t rktp_lo_offset;         
 rd_ts_t            rktp_ts_offset_lag;

    char              *rktp_offset_path;     /* Path to offset file */
    FILE              *rktp_offset_fp;       /* Offset file pointer */
        rd_kafka_cgrp_t   *rktp_cgrp;            /* Belongs to this cgrp */

        int                rktp_assigned;   /* Partition in cgrp assignment */

        rd_kafka_replyq_t  rktp_replyq; /* Current replyq+version
                     * for propagating
                     * major operations, e.g.,
                     * FETCH_STOP. */
    int                rktp_flags;

        shptr_rd_kafka_toppar_t *rktp_s_for_desp; /* Shared pointer for
                                                   * rkt_desp list */
        shptr_rd_kafka_toppar_t *rktp_s_for_cgrp; /* Shared pointer for
                                                   * rkcg_toppars list */
        shptr_rd_kafka_toppar_t *rktp_s_for_rkb;  /* Shared pointer for
                                                   * rkb_toppars list */

    /*
     * Timers
     */
    rd_kafka_timer_t rktp_offset_query_tmr;  /* Offset query timer */
    rd_kafka_timer_t rktp_offset_commit_tmr; /* Offset commit timer */
    rd_kafka_timer_t rktp_offset_sync_tmr;   /* Offset file sync timer */
        rd_kafka_timer_t rktp_consumer_lag_tmr;  /* Consumer lag monitoring
                          * timer */

        int rktp_wait_consumer_lag_resp;         /* Waiting for consumer lag
                                                  * response. */

    struct {
        rd_atomic64_t tx_msgs;
        rd_atomic64_t tx_bytes;
                rd_atomic64_t msgs;
                rd_atomic64_t rx_ver_drops;
    } rktp_c;
}
  • 創建一個 rd_kafka_toppar_t對象 rd_kafka_toppar_new0:
shptr_rd_kafka_toppar_t *rd_kafka_toppar_new0 (rd_kafka_itopic_t *rkt,
                           int32_t partition,
                           const char *func, int line) {
    rd_kafka_toppar_t *rktp;

       // 分配內存
    rktp = rd_calloc(1, sizeof(*rktp));

        // 各項賦值
    rktp->rktp_partition = partition;

        // 屬于哪個topic
    rktp->rktp_rkt = rkt;

        rktp->rktp_leader_id = -1;
    rktp->rktp_fetch_state = RD_KAFKA_TOPPAR_FETCH_NONE;
        rktp->rktp_fetch_msg_max_bytes
            = rkt->rkt_rk->rk_conf.fetch_msg_max_bytes;
    rktp->rktp_offset_fp = NULL;
        rd_kafka_offset_stats_reset(&rktp->rktp_offsets);
        rd_kafka_offset_stats_reset(&rktp->rktp_offsets_fin);
        rktp->rktp_hi_offset = RD_KAFKA_OFFSET_INVALID;
    rktp->rktp_lo_offset = RD_KAFKA_OFFSET_INVALID;
    rktp->rktp_app_offset = RD_KAFKA_OFFSET_INVALID;
        rktp->rktp_stored_offset = RD_KAFKA_OFFSET_INVALID;
        rktp->rktp_committed_offset = RD_KAFKA_OFFSET_INVALID;
    rd_kafka_msgq_init(&rktp->rktp_msgq);
        rktp->rktp_msgq_wakeup_fd = -1;
    rd_kafka_msgq_init(&rktp->rktp_xmit_msgq);
    mtx_init(&rktp->rktp_lock, mtx_plain);

        rd_refcnt_init(&rktp->rktp_refcnt, 0);
    rktp->rktp_fetchq = rd_kafka_q_new(rkt->rkt_rk);
        rktp->rktp_ops    = rd_kafka_q_new(rkt->rkt_rk);
        rktp->rktp_ops->rkq_serve = rd_kafka_toppar_op_serve;
        rktp->rktp_ops->rkq_opaque = rktp;
        rd_atomic32_init(&rktp->rktp_version, 1);
    rktp->rktp_op_version = rd_atomic32_get(&rktp->rktp_version);

        // 開始一個timer, 來定時統計消息的lag情況, 目前看是一個`rd_kafka_toppar_t`對象就一個timer, 太多了, 可以用時間輪來作所有partiton的timer
        if (rktp->rktp_rkt->rkt_rk->rk_conf.stats_interval_ms > 0 &&
            rkt->rkt_rk->rk_type == RD_KAFKA_CONSUMER &&
            rktp->rktp_partition != RD_KAFKA_PARTITION_UA) {
                int intvl = rkt->rkt_rk->rk_conf.stats_interval_ms;
                if (intvl < 10 * 1000 /* 10s */)
                        intvl = 10 * 1000;
        rd_kafka_timer_start(&rkt->rkt_rk->rk_timers,
                     &rktp->rktp_consumer_lag_tmr,
                                     intvl * 1000ll,
                     rd_kafka_toppar_consumer_lag_tmr_cb,
                     rktp);
        }

        rktp->rktp_s_rkt = rd_kafka_topic_keep(rkt);

        // 設置其fwd op queue到rd_kakfa_t中的rd_ops, 這樣這個rd_kafka_toppar_t對象用到的ops_queue就是rd_kafka_t的了
    rd_kafka_q_fwd_set(rktp->rktp_ops, rkt->rkt_rk->rk_ops);
    rd_kafka_dbg(rkt->rkt_rk, TOPIC, "TOPPARNEW", "NEW %s [%"PRId32"] %p (at %s:%d)",
             rkt->rkt_topic->str, rktp->rktp_partition, rktp,
             func, line);

    return rd_kafka_toppar_keep_src(func, line, rktp);
}
  • 銷毀一個rd_kafka_toppar_t對象rd_kafka_toppar_destroy_final
void rd_kafka_toppar_destroy_final (rd_kafka_toppar_t *rktp) {
        // 停掉相應的timer, 清空ops queue
        rd_kafka_toppar_remove(rktp);

        // 將msgq中的kafka message回調給app層后清空
    rd_kafka_dr_msgq(rktp->rktp_rkt, &rktp->rktp_msgq,
             RD_KAFKA_RESP_ERR__DESTROY);
    rd_kafka_q_destroy_owner(rktp->rktp_fetchq);
        rd_kafka_q_destroy_owner(rktp->rktp_ops);

    rd_kafka_replyq_destroy(&rktp->rktp_replyq);

    rd_kafka_topic_destroy0(rktp->rktp_s_rkt);

    mtx_destroy(&rktp->rktp_lock);

        rd_refcnt_destroy(&rktp->rktp_refcnt);

    rd_free(rktp);
}
  • 從一個rd_kafka_itopic_t(這個我們后面會有專門篇章來介紹, 這里只需要知道它表示topic即可, 里面包括屬于它的parition列表)獲取指定parition:
shptr_rd_kafka_toppar_t *rd_kafka_toppar_get0 (const char *func, int line,
                                               const rd_kafka_itopic_t *rkt,
                                               int32_t partition,
                                               int ua_on_miss) {
        shptr_rd_kafka_toppar_t *s_rktp;
 
        // 數組索引下標來獲取 partition
    if (partition >= 0 && partition < rkt->rkt_partition_cnt)
        s_rktp = rkt->rkt_p[partition];
    else if (partition == RD_KAFKA_PARTITION_UA || ua_on_miss)
        s_rktp = rkt->rkt_ua;
    else
        return NULL;

    if (s_rktp)
               // 引用計數加1 
                return rd_kafka_toppar_keep_src(func,line,
                                                rd_kafka_toppar_s2i(s_rktp));

    return NULL;
}
  • 按topic名字和partition來獲取一個rd_kafka_toppar_t對象, 沒有找到topic, 就先創建這個 rd_kafka_itopic_t對象
shptr_rd_kafka_toppar_t *rd_kafka_toppar_get2 (rd_kafka_t *rk,
                                               const char *topic,
                                               int32_t partition,
                                               int ua_on_miss,
                                               int create_on_miss) {
    shptr_rd_kafka_itopic_t *s_rkt;
        rd_kafka_itopic_t *rkt;
        shptr_rd_kafka_toppar_t *s_rktp;

        rd_kafka_wrlock(rk);

        /* Find or create topic */
        // 所有的 rd_kafka_itopic_t對象都存在rd_kafka_t的rkt_topic的tailq隊列里, 這里先查找
    if (unlikely(!(s_rkt = rd_kafka_topic_find(rk, topic, 0/*no-lock*/)))) {
                if (!create_on_miss) {
                        rd_kafka_wrunlock(rk);
                        return NULL;
                }
                // 沒找到就先創建  rd_kafka_itopic_t對象
                s_rkt = rd_kafka_topic_new0(rk, topic, NULL,
                        NULL, 0/*no-lock*/);
                if (!s_rkt) {
                        rd_kafka_wrunlock(rk);
                        rd_kafka_log(rk, LOG_ERR, "TOPIC",
                                     "Failed to create local topic \"%s\": %s",
                                     topic, rd_strerror(errno));
                        return NULL;
                }
        }

        rd_kafka_wrunlock(rk);

        rkt = rd_kafka_topic_s2i(s_rkt);

    rd_kafka_topic_wrlock(rkt);
    s_rktp = rd_kafka_toppar_desired_add(rkt, partition);
    rd_kafka_topic_wrunlock(rkt);

        rd_kafka_topic_destroy0(s_rkt);

    return s_rktp;
}
  • desired partition: desired partition狀態的parititon, 源碼中的解釋如下:

The desired partition list is the list of partitions that are desired
(e.g., by the consumer) but not yet seen on a broker.
As soon as the partition is seen on a broker the toppar is moved from
the desired list and onto the normal rkt_p array.
When the partition on the broker goes away a desired partition is put
back on the desired list

簡單說就是需要某一個partition, 但是這個parition的具體信息還沒從broker拿掉,這樣的parition就是desired parition, 在rd_kafka_itopic_t中有一個rkt_desp的list, 專門用來存這樣的parition, 針對其有如下幾個操作,都比較簡單:

rd_kafka_toppar_desired_get
rd_kafka_toppar_desired_link
rd_kafka_toppar_desired_unlink
rd_kafka_toppar_desired_add0
rd_kafka_toppar_desired_add
rd_kafka_toppar_desired_del
  • partition在broker間遷移rd_kafka_toppar_broker_migrate:
static void rd_kafka_toppar_broker_migrate (rd_kafka_toppar_t *rktp,
                                            rd_kafka_broker_t *old_rkb,
                                            rd_kafka_broker_t *new_rkb) {
        rd_kafka_op_t *rko;
        rd_kafka_broker_t *dest_rkb;
        int had_next_leader = rktp->rktp_next_leader ? 1 : 0;

        /* Update next leader */
        if (new_rkb)
                rd_kafka_broker_keep(new_rkb);
        if (rktp->rktp_next_leader)
                rd_kafka_broker_destroy(rktp->rktp_next_leader);
        rktp->rktp_next_leader = new_rkb;
        
        // 在遷移沒完成時有可能再次遷移了, 這個時候是不是需要加鎖? 
        if (had_next_leader)
                return;

    if (rktp->rktp_fetch_state == RD_KAFKA_TOPPAR_FETCH_OFFSET_WAIT) {
        rd_kafka_toppar_set_fetch_state(
            rktp, RD_KAFKA_TOPPAR_FETCH_OFFSET_QUERY);
        rd_kafka_timer_start(&rktp->rktp_rkt->rkt_rk->rk_timers,
                     &rktp->rktp_offset_query_tmr,
                     500*1000,
                     rd_kafka_offset_query_tmr_cb,
                     rktp);
    }

        //  遷移前broker放到LEAVE op
        if (old_rkb) {
                rko = rd_kafka_op_new(RD_KAFKA_OP_PARTITION_LEAVE);
                dest_rkb = old_rkb;
        } else {
                /* No existing broker, send join op directly to new leader. */
                rko = rd_kafka_op_new(RD_KAFKA_OP_PARTITION_JOIN);
                dest_rkb = new_rkb;
        }

        rko->rko_rktp = rd_kafka_toppar_keep(rktp);

        rd_kafka_q_enq(dest_rkb->rkb_ops, rko);
}
  • broker的delegate操作:
void rd_kafka_toppar_broker_delegate (rd_kafka_toppar_t *rktp,
                      rd_kafka_broker_t *rkb,
                      int for_removal) {
        rd_kafka_t *rk = rktp->rktp_rkt->rkt_rk;
        int internal_fallback = 0;

        /* Delegate toppars with no leader to the
         * internal broker for bookkeeping. */
        // 如果遷移到的broker是NULL, 就獲取一個internal broker -> rkb
        if (!rkb && !for_removal && !rd_kafka_terminating(rk)) {
                rkb = rd_kafka_broker_internal(rk);
                internal_fallback = 1;
        }

    if (rktp->rktp_leader == rkb && !rktp->rktp_next_leader) {
                rd_kafka_dbg(rktp->rktp_rkt->rkt_rk, TOPIC, "BRKDELGT",
                 "%.*s [%"PRId32"]: not updating broker: "
                             "already on correct broker %s",
                 RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic),
                 rktp->rktp_partition,
                             rkb ? rd_kafka_broker_name(rkb) : "(none)");

                if (internal_fallback)
                        rd_kafka_broker_destroy(rkb);
        return;
        }

        // 實際的遷移操作
        if (rktp->rktp_leader || rkb)
                rd_kafka_toppar_broker_migrate(rktp, rktp->rktp_leader, rkb);

        if (internal_fallback)
                rd_kafka_broker_destroy(rkb);
}
  • 提交offstet到broker rd_kafka_toppar_offset_commit
void rd_kafka_toppar_offset_commit (rd_kafka_toppar_t *rktp, int64_t offset,
                    const char *metadata) {
        rd_kafka_topic_partition_list_t *offsets;
        rd_kafka_topic_partition_t *rktpar;

        // 構造 一個rd_kafka_topic_partition_list, 把當前的topic添加進去, 包括要提交的offset
        offsets = rd_kafka_topic_partition_list_new(1);
        rktpar = rd_kafka_topic_partition_list_add(
                offsets, rktp->rktp_rkt->rkt_topic->str, rktp->rktp_partition);
        rktpar->offset = offset;
        if (metadata) {
                rktpar->metadata = rd_strdup(metadata);
                rktpar->metadata_size = strlen(metadata);
        }

        // rd_kafka_toppar_t對象更新rktp_committing_offset,表示正在提交的offset
        rktp->rktp_committing_offset = offset;

       // 異步提交offset, 這個操作在之后介紹kafka consumer是會詳細分析
        rd_kafka_commit(rktp->rktp_rkt->rkt_rk, offsets, 1/*async*/);

        rd_kafka_topic_partition_list_destroy(offsets);
}
  • 設置下一次拉取數據時開始的offset位置,即rd_kafka_toppar_trktp_next_offset
void rd_kafka_toppar_next_offset_handle (rd_kafka_toppar_t *rktp,
                                         int64_t Offset) {
        // 如果Offset是BEGINNING,END, 發起一個rd_kafka_toppar_offset_request操作,從broker獲取offset
        // 如果Offset是RD_KAFKA_OFFSET_INVALID, 需要enqueue一個error op, 設置fetch狀態為RD_KAFKA_TOPPAR_FETCH_NONE
        if (RD_KAFKA_OFFSET_IS_LOGICAL(Offset)) {
                /* Offset storage returned logical offset (e.g. "end"),
                 * look it up. */
                rd_kafka_offset_reset(rktp, Offset, RD_KAFKA_RESP_ERR_NO_ERROR,
                                      "update");
                return;
        }

        /* Adjust by TAIL count if, if wanted */
        // 獲取從tail開始往前推cnt個offset的位置
        if (rktp->rktp_query_offset <=
            RD_KAFKA_OFFSET_TAIL_BASE) {
                int64_t orig_Offset = Offset;
                int64_t tail_cnt =
                        llabs(rktp->rktp_query_offset -
                              RD_KAFKA_OFFSET_TAIL_BASE);

                if (tail_cnt > Offset)
                        Offset = 0;
                else
                        Offset -= tail_cnt;
        }

        //設置rktp_next_offset
        rktp->rktp_next_offset = Offset;

        rd_kafka_toppar_set_fetch_state(rktp, RD_KAFKA_TOPPAR_FETCH_ACTIVE);

        /* Wake-up broker thread which might be idling on IO */
        if (rktp->rktp_leader)
                rd_kafka_broker_wakeup(rktp->rktp_leader);

}
  • 從coordinattor獲取已提交的offset(FetchOffsetRequest) rd_kafka_toppar_offset_fetch:
void rd_kafka_toppar_offset_fetch (rd_kafka_toppar_t *rktp,
                                   rd_kafka_replyq_t replyq) {
        rd_kafka_t *rk = rktp->rktp_rkt->rkt_rk;
        rd_kafka_topic_partition_list_t *part;
        rd_kafka_op_t *rko;

        part = rd_kafka_topic_partition_list_new(1);
        rd_kafka_topic_partition_list_add0(part,
                                           rktp->rktp_rkt->rkt_topic->str,
                                           rktp->rktp_partition,
                       rd_kafka_toppar_keep(rktp));

        // 構造OffsetFetch的operator
        rko = rd_kafka_op_new(RD_KAFKA_OP_OFFSET_FETCH);
    rko->rko_rktp = rd_kafka_toppar_keep(rktp);
    rko->rko_replyq = replyq;

    rko->rko_u.offset_fetch.partitions = part;
    rko->rko_u.offset_fetch.do_free = 1;

        // OffsetFetch 請求是與消費有關的,放入cgrp的op queue里
        rd_kafka_q_enq(rktp->rktp_cgrp->rkcg_ops, rko);
}
  • 獲取用于消費的有效的offset
void rd_kafka_toppar_offset_request (rd_kafka_toppar_t *rktp,
                     int64_t query_offset, int backoff_ms) {
    rd_kafka_broker_t *rkb;
        rkb = rktp->rktp_leader;

         // 如果rkb是無效的,需要下一個timer來定時query
        if (!backoff_ms && (!rkb || rkb->rkb_source == RD_KAFKA_INTERNAL))
                backoff_ms = 500;

        if (backoff_ms) {
                rd_kafka_toppar_set_fetch_state(
                        rktp, RD_KAFKA_TOPPAR_FETCH_OFFSET_QUERY);
                // 啟動timer, timer到期會執行rd_kafka_offset_query_tmr_cb回調,這個回調還是調用當前這個函數
        rd_kafka_timer_start(&rktp->rktp_rkt->rkt_rk->rk_timers,
                     &rktp->rktp_offset_query_tmr,
                     backoff_ms*1000ll,
                     rd_kafka_offset_query_tmr_cb, rktp);
        return;
        }

        // stop這個重試的timer
        rd_kafka_timer_stop(&rktp->rktp_rkt->rkt_rk->rk_timers,
                            &rktp->rktp_offset_query_tmr, 1/*lock*/);

        // 從coordinattor獲取需要消費的offset
    if (query_offset == RD_KAFKA_OFFSET_STORED &&
            rktp->rktp_rkt->rkt_conf.offset_store_method ==
            RD_KAFKA_OFFSET_METHOD_BROKER) {
                /*
                 * Get stored offset from broker based storage:
                 * ask cgrp manager for offsets
                 */
                rd_kafka_toppar_offset_fetch(
            rktp,
            RD_KAFKA_REPLYQ(rktp->rktp_ops,
                    rktp->rktp_op_version));

    } else {
                shptr_rd_kafka_toppar_t *s_rktp;
                rd_kafka_topic_partition_list_t *offsets;

                /*
                 * Look up logical offset (end,beginning,tail,..)
                 */
                s_rktp = rd_kafka_toppar_keep(rktp);

        if (query_offset <= RD_KAFKA_OFFSET_TAIL_BASE)
            query_offset = RD_KAFKA_OFFSET_END;

                offsets = rd_kafka_topic_partition_list_new(1);
                rd_kafka_topic_partition_list_add(
                        offsets,
                        rktp->rktp_rkt->rkt_topic->str,
                        rktp->rktp_partition)->offset = query_offset;
                
                // 基本上用于reset offset, 獲取當前partition的最舊offset或最新offset
                rd_kafka_OffsetRequest(rkb, offsets, 0,
                                       RD_KAFKA_REPLYQ(rktp->rktp_ops,
                                                       rktp->rktp_op_version),
                                       rd_kafka_toppar_handle_Offset,
                                       s_rktp);

                rd_kafka_topic_partition_list_destroy(offsets);
        }

        rd_kafka_toppar_set_fetch_state(rktp,
                RD_KAFKA_TOPPAR_FETCH_OFFSET_WAIT);
}

Librdkafka源碼分析-Content Table

最后編輯于
?著作權歸作者所有,轉載或內容合作請聯系作者
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發布,文章內容僅代表作者本人觀點,簡書系信息發布平臺,僅提供信息存儲服務。
  • 序言:七十年代末,一起剝皮案震驚了整個濱河市,隨后出現的幾起案子,更是在濱河造成了極大的恐慌,老刑警劉巖,帶你破解...
    沈念sama閱讀 229,836評論 6 540
  • 序言:濱河連續發生了三起死亡事件,死亡現場離奇詭異,居然都是意外死亡,警方通過查閱死者的電腦和手機,發現死者居然都...
    沈念sama閱讀 99,275評論 3 428
  • 文/潘曉璐 我一進店門,熙熙樓的掌柜王于貴愁眉苦臉地迎上來,“玉大人,你說我怎么就攤上這事。” “怎么了?”我有些...
    開封第一講書人閱讀 177,904評論 0 383
  • 文/不壞的土叔 我叫張陵,是天一觀的道長。 經常有香客問我,道長,這世上最難降的妖魔是什么? 我笑而不...
    開封第一講書人閱讀 63,633評論 1 317
  • 正文 為了忘掉前任,我火速辦了婚禮,結果婚禮上,老公的妹妹穿的比我還像新娘。我一直安慰自己,他們只是感情好,可當我...
    茶點故事閱讀 72,368評論 6 410
  • 文/花漫 我一把揭開白布。 她就那樣靜靜地躺著,像睡著了一般。 火紅的嫁衣襯著肌膚如雪。 梳的紋絲不亂的頭發上,一...
    開封第一講書人閱讀 55,736評論 1 328
  • 那天,我揣著相機與錄音,去河邊找鬼。 笑死,一個胖子當著我的面吹牛,可吹牛的內容都是我干的。 我是一名探鬼主播,決...
    沈念sama閱讀 43,740評論 3 446
  • 文/蒼蘭香墨 我猛地睜開眼,長吁一口氣:“原來是場噩夢啊……” “哼!你這毒婦竟也來了?” 一聲冷哼從身側響起,我...
    開封第一講書人閱讀 42,919評論 0 289
  • 序言:老撾萬榮一對情侶失蹤,失蹤者是張志新(化名)和其女友劉穎,沒想到半個月后,有當地人在樹林里發現了一具尸體,經...
    沈念sama閱讀 49,481評論 1 335
  • 正文 獨居荒郊野嶺守林人離奇死亡,尸身上長有42處帶血的膿包…… 初始之章·張勛 以下內容為張勛視角 年9月15日...
    茶點故事閱讀 41,235評論 3 358
  • 正文 我和宋清朗相戀三年,在試婚紗的時候發現自己被綠了。 大學時的朋友給我發了我未婚夫和他白月光在一起吃飯的照片。...
    茶點故事閱讀 43,427評論 1 374
  • 序言:一個原本活蹦亂跳的男人離奇死亡,死狀恐怖,靈堂內的尸體忽然破棺而出,到底是詐尸還是另有隱情,我是刑警寧澤,帶...
    沈念sama閱讀 38,968評論 5 363
  • 正文 年R本政府宣布,位于F島的核電站,受9級特大地震影響,放射性物質發生泄漏。R本人自食惡果不足惜,卻給世界環境...
    茶點故事閱讀 44,656評論 3 348
  • 文/蒙蒙 一、第九天 我趴在偏房一處隱蔽的房頂上張望。 院中可真熱鬧,春花似錦、人聲如沸。這莊子的主人今日做“春日...
    開封第一講書人閱讀 35,055評論 0 28
  • 文/蒼蘭香墨 我抬頭看了看天上的太陽。三九已至,卻和暖如春,著一層夾襖步出監牢的瞬間,已是汗流浹背。 一陣腳步聲響...
    開封第一講書人閱讀 36,348評論 1 294
  • 我被黑心中介騙來泰國打工, 沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留,地道東北人。 一個月前我還...
    沈念sama閱讀 52,160評論 3 398
  • 正文 我出身青樓,卻偏偏與公主長得像,于是被迫代替她去往敵國和親。 傳聞我的和親對象是個殘疾皇子,可洞房花燭夜當晚...
    茶點故事閱讀 48,380評論 2 379

推薦閱讀更多精彩內容