byebugコードリーディング

byebug(10.0.0)のコードリーディングをしました。

#byebugはオープンクラスでKernelに定義されています。

module Byebug
  def self.attach
    require "byebug/core"

    unless started?
      self.mode = :attached

      start
      run_init_script
    end

    current_context.step_out(3, true)
  end
# ...
end

module Kernel
  def byebug
    Byebug.attach
  end

# ...
  alias debugger byebug
end

#byebugはByebug.attachを呼び出します。#started?や#startはC拡張の関数です。

Init_byebug()
{
  mByebug = rb_define_module("Byebug");

# ...
  rb_define_module_function(mByebug, "start", Start, 0);
  rb_define_module_function(mByebug, "started?", Started, 0);
# ...
}

Started関数はIS_STARTEDがtrueかどうかを返します。UNUSEDはunused parametersのWARNINGが出ないためのマクロなので読むときはスルーでOK。

static VALUE
Started(VALUE self)
{
  UNUSED(self);

  return IS_STARTED ? Qtrue : Qfalse;
}

IS_STARTEDはcatchpointsがヌルポインターでなければ（＝何かしら設定されていれば）trueを返します。

#define IS_STARTED (!NIL_P(catchpoints))

Start関数はcatchpointsにRubyのハッシュを設定しスレッドテーブルを生成した後、register_tracepoints関数を呼びます。

static VALUE
Start(VALUE self)
{
  if (IS_STARTED)
    return Qfalse;

  catchpoints = rb_hash_new();

  threads = create_threads_table();

  register_tracepoints(self);

  return Qtrue;
}

register_tracepoints関数はTracePointを設定します。byebugはTracePointで:lineや:b_returnなどのイベントをフックしてREPLを起動しています。

static void
register_tracepoints(VALUE self)
{
  int i;
  VALUE traces = tracepoints;

  UNUSED(self);

  if (NIL_P(traces))
  {
    int line_msk = RUBY_EVENT_LINE;
    int call_msk = RUBY_EVENT_CALL;
    int ret_msk = RUBY_EVENT_RETURN | RUBY_EVENT_B_RETURN;
    int end_msk = RUBY_EVENT_END;
    int raw_call_msk = RUBY_EVENT_C_CALL | RUBY_EVENT_B_CALL | RUBY_EVENT_CLASS;
    int raw_ret_msk = RUBY_EVENT_C_RETURN;
    int raise_msk = RUBY_EVENT_RAISE;

    VALUE tpLine = rb_tracepoint_new(Qnil, line_msk, line_event, 0);
    VALUE tpCall = rb_tracepoint_new(Qnil, call_msk, call_event, 0);
    VALUE tpReturn = rb_tracepoint_new(Qnil, ret_msk, return_event, 0);
    VALUE tpEnd = rb_tracepoint_new(Qnil, end_msk, end_event, 0);
    VALUE tpCCall = rb_tracepoint_new(Qnil, raw_call_msk, raw_call_event, 0);
    VALUE tpCReturn = rb_tracepoint_new(Qnil, raw_ret_msk, raw_return_event, 0);
    VALUE tpRaise = rb_tracepoint_new(Qnil, raise_msk, raise_event, 0);

    traces = rb_ary_new();
    rb_ary_push(traces, tpLine);
    rb_ary_push(traces, tpCall);
    rb_ary_push(traces, tpReturn);
    rb_ary_push(traces, tpEnd);
    rb_ary_push(traces, tpCCall);
    rb_ary_push(traces, tpCReturn);
    rb_ary_push(traces, tpRaise);

    tracepoints = traces;
  }

  for (i = 0; i < RARRAY_LENINT(traces); i++)
    rb_tracepoint_enable(rb_ary_entry(traces, i));
}

tracepointsの配列にTracePointのインスタンスを入れて、最後にrb_tracepoint_enable関数によって有効化します。

start後はContext#step_outを呼び出します（再掲）

module Byebug
  def self.attach
    require "byebug/core"

    unless started?
      self.mode = :attached

      start
      run_init_script
    end

    current_context.step_out(3, true)
  end
#...
end

module Kernel
  def byebug
    Byebug.attach
  end
# ...
end

引数の3は

step_outのCメソッドからのreturn
Byebug.attachメソッドからのreturn
byebugメソッドからのreturn

の3つのreturnが終わった後、ブレークするために設定している値です。

Context_step_outは以下の通りです。debug_context_t構造体のメンバ変数steps_outが3に設定され、forceがtrueになり、contextのフラグにCTX_FL_STOP_ON_RETが立ちます。その名の通り、returnで止まるフラグです。

static VALUE
Context_step_out(int argc, VALUE *argv, VALUE self)
{
  int n_args, n_frames;
  VALUE v_frames, force;
  debug_context_t *context;

  n_args = rb_scan_args(argc, argv, "02", &v_frames, &force);
  n_frames = n_args == 0 ? 1 : FIX2INT(v_frames);

  Data_Get_Struct(self, debug_context_t, context);

# ... 

  context->steps_out = n_frames;
  if (n_args == 2 && RTEST(force))
    CTX_FL_SET(context, CTX_FL_STOP_ON_RET);
  else
    CTX_FL_UNSET(context, CTX_FL_STOP_ON_RET);

  return Qnil;
}

Byebug.startで設定したTracePointにより、メソッドのreturnやブロックのreturnに対してreturn_eventの関数がコールバックとして呼ばれます。call_at_xxx系の関数はcall_at関数を呼び出します。call_at関数はline_event関数のところで説明します。

static void
return_event(VALUE trace_point, void *data)
{
  VALUE brkpnt, file, line, binding;

  EVENT_SETUP;

  RETURN_EVENT_SETUP;

  if ((dc->steps_out == 0) && (CTX_FL_TEST(dc, CTX_FL_STOP_ON_RET)))
  {
    reset_stepping_stop_points(dc);

    call_at_return(context, dc, rb_tracearg_return_value(trace_arg));
  }

# ...
  RETURN_EVENT_TEARDOWN;

RETURN_EVENT_SETUPやRETURN_EVENT_TEARDOWNのマクロは以下のように定義されており、context->steps_outが1の場合にdc->stepsに1をセットしたり、context->steps_outを減らします。

#define RETURN_EVENT_SETUP \
  dc->calced_stack_size--; \
                           \
  if (dc->steps_out == 1)  \
    dc->steps = 1;

#define RETURN_EVENT_TEARDOWN \
  dc->steps_out = dc->steps_out <= 0 ? -1 : dc->steps_out - 1;

steps_outは3なので、「step_outのCメソッドからのreturn」「Byebug.attachメソッドからのreturn」「byebugメソッドからのreturn」が終わるとdc->steps_out が0でdc->stepsが1の状態になります。

一方、式の評価であるTracePointの:lineイベントを呼び出すとコールバックとしてline_event関数が呼ばれます。

static void
line_event(VALUE trace_point, void *data)
{
  VALUE brkpnt, file, line, binding;

  EVENT_SETUP;

  file = rb_tracearg_path(trace_arg);
  line = rb_tracearg_lineno(trace_arg);
  binding = rb_tracearg_binding(trace_arg);

  if (RTEST(tracing))
    call_at_tracing(context, dc);

  if (!CTX_FL_TEST(dc, CTX_FL_IGNORE_STEPS))
    dc->steps = dc->steps <= 0 ? -1 : dc->steps - 1;

  if (dc->calced_stack_size <= dc->dest_frame)
  {
    dc->dest_frame = dc->calced_stack_size;
    CTX_FL_UNSET(dc, CTX_FL_IGNORE_STEPS);

    dc->lines = dc->lines <= 0 ? -1 : dc->lines - 1;
  }

  if (dc->steps == 0 || dc->lines == 0)
    call_at_line_check(context, dc, Qnil);
  else
  {
# ...
  }

  EVENT_TEARDOWN;
}

EVENT_SETUPマクロによって context変数やdc変数がセットされます。

#define EVENT_SETUP                                     \
  debug_context_t *dc;                                  \
  VALUE context;                                        \
  rb_trace_arg_t *trace_arg;                            \
                                                        \
  UNUSED(data);                                         \
                                                        \
  if (!is_living_thread(rb_thread_current()))           \
    return;                                             \
                                                        \
  thread_context_lookup(rb_thread_current(), &context); \
  Data_Get_Struct(context, debug_context_t, dc);        \
                                                        \
  trace_arg = rb_tracearg_from_tracepoint(trace_point); \
  if (verbose == Qtrue)                                 \
    trace_print(trace_arg, dc, 0, 0);                   \
                                                        \
  if (CTX_FL_TEST(dc, CTX_FL_IGNORE))                   \
    return;                                             \
                                                        \
  acquire_lock(dc);

CTX_FL_IGNORE_STEPSが立っていなければdc->stepsが1減算されdc->stepsが0のとき（byebugを呼び出してreturnが3回呼ばれた状態）にcall_at_line_check関数が呼ばれます。call_at_line_checkはさらにcall_at_line関数を呼び出し、call_at_lineはcall_at関数を呼び出します。

call_at_line_check(VALUE ctx, debug_context_t *dc, VALUE breakpoint)
{
  dc->stop_reason = CTX_STOP_STEP;

  if (!NIL_P(breakpoint))
    call_at_breakpoint(ctx, dc, breakpoint);

  reset_stepping_stop_points(dc);

  call_at_line(ctx, dc);
}

call_at関数は以下のように定義されています。call_at_lineの場合、midは:at_lineのシンボルのIDが入ります。

static VALUE
call_at(VALUE ctx, debug_context_t *dc, ID mid, int argc, VALUE arg)
{
  struct call_with_inspection_data cwi;
  VALUE argv[1];

  argv[0] = arg;

  cwi.dc = dc;
  cwi.ctx = ctx;
  cwi.id = mid;
  cwi.argc = argc;
  cwi.argv = &argv[0];

  return call_with_debug_inspector(&cwi);
}

static VALUE
call_at_line(VALUE ctx, debug_context_t *dc)
{
  return call_at(ctx, dc, rb_intern("at_line"), 0, Qnil);
}

call_atはcall_with_debug_inspectorを呼び出します。rb_ensureは第一引数の関数を第二引数を引数として呼び出し、raiseしたら第三引数の関数を第四引数を引数として呼び出します。

extern VALUE
call_with_debug_inspector(struct call_with_inspection_data *data)
{
  return rb_ensure(open_debug_inspector, (VALUE)data, close_debug_inspector,
                   (VALUE)data);
}

static VALUE
open_debug_inspector(struct call_with_inspection_data *cwi)
{
  return rb_debug_inspector_open(open_debug_inspector_i, cwi);
}

static VALUE
open_debug_inspector_i(const rb_debug_inspector_t *inspector, void *data)
{
  struct call_with_inspection_data *cwi =
    (struct call_with_inspection_data *)data;

  cwi->dc->backtrace = load_backtrace(inspector);

  return rb_funcall2(cwi->ctx, cwi->id, cwi->argc, cwi->argv);
}

open_debug_inspector関数はrb_debug_inspector_openを呼び出します。rb_debug_inspector_openを呼び出している理由はload_backtraceメソッドによってバックトレースを取得するためです。最終的にopen_debug_inspector_iが呼び出され、rb_funcall2によりContext#at_lineが呼ばれます。

module Byebug
  class Context
    def at_line
      self.frame = 0
      return if ignored_file?(file)

      processor.at_line
    end

processorはCommandProcessorのインスタンスなのでCommandProcessor#at_lineが呼ばれます。#at_lineは#process_commandsを呼びます。

module Byebug
  class CommandProcessor
# ...
    def at_line
      process_commands
    end
# ...
    def process_commands
      before_repl

      repl
    ensure
      after_repl
    end

#replではプロンプトを表示してユーザからの入力を待ち、入力されたコマンドを#run_cmdで実行します。

    def repl
      until @proceed
        cmd = interface.read_command(prompt)
        return if cmd.nil?

        next if cmd == ""

        run_cmd(cmd)
      end
    end

次に各コマンドについて読んでいきます。

nextは以下のような定義になっています。Context#step_overを呼び出し、CommandProcessor#proceed!を呼び出します。#proceed!はREPLの入出力ループを抜けるためのフラグ制御です。

module Byebug
  class NextCommand < Command
# ...
    def execute
      steps, err = parse_steps(@match[1], "Next")
      return errmsg(err) unless steps

      context.step_over(steps, context.frame.pos)
      processor.proceed!
    end
  end
end

Context_step_overは以下のように定義されています。nextを引数無しで叩いた場合はcontext->linesに1、context->dest_frameはcontext->calced_stack_size（＝バックトレースのスタック数）がセットされます。

static VALUE
Context_step_over(int argc, VALUE *argv, VALUE self)
{
  int n_args, frame;
  VALUE lines, v_frame;
  debug_context_t *context;

  Data_Get_Struct(self, debug_context_t, context);

# ...

  n_args = rb_scan_args(argc, argv, "11", &lines, &v_frame);
  frame = n_args == 1 ? 0 : FIX2INT(v_frame);

# ...

  context->lines = FIX2INT(lines);
  context->dest_frame = context->calced_stack_size - frame;

  return Qnil;
}

context->linesはline_eventで1ずつ減少します。nextを叩いた時のスタック階層と同じか、上の階層のスタックのline_eventの場合のみ減少しています（そうしないとstep_inしちゃう）。line_eventはcontext->linesが0のときにもcall_at_line_checkを呼び出すので結果として次の行でブレークする処理を実現しています。

  if (dc->calced_stack_size <= dc->dest_frame)
  {
    dc->dest_frame = dc->calced_stack_size;
    CTX_FL_UNSET(dc, CTX_FL_IGNORE_STEPS);

    dc->lines = dc->lines <= 0 ? -1 : dc->lines - 1;
  }

stepコマンドは以下のような定義になっています。Context#step_intoを呼び出した後、CommandProcessor#proceed!を呼び出します。

module Byebug
  class StepCommand < Command
# ...
    def execute
      steps, err = parse_steps(@match[1], "Steps")
      return errmsg(err) unless steps

      context.step_into(steps, context.frame.pos)
      processor.proceed!
    end
  end
end

step_intoの場合はstepsを設定します。stepsはスタック階層によらずline_eventで1ずつ減少するため、step inを実現できます。

static VALUE
Context_step_into(int argc, VALUE *argv, VALUE self)
{

# ...

  n_args = rb_scan_args(argc, argv, "11", &steps, &v_frame);

# ...

  from_frame = n_args == 1 ? 0 : FIX2INT(v_frame);

# ...

  context->steps = FIX2INT(steps);
  context->dest_frame = context->calced_stack_size - from_frame;

  return steps;
}

finishコマンドは以下のような定義になっています。引数無しで呼び出した場合、step_outの第一引数は1、forceはfalseになります。

module Byebug
  class FinishCommand < Command
# ...
    def execute
# ...
      force = n_frames.zero? ? true : false
      context.step_out(context.frame.pos + n_frames, force)
      context.frame = 0
      processor.proceed!
    end

step_outが1のときにreturnイベントがあると、context->stepsに1がセットされるのでreturn直後のline_eventでブレークします。

Cのコードを読む時のあれこれ

Cのコードを読むにあたっては、gdb がかなり便利です。gdbを使ってRubyのデバッグをする方法はこちらの記事がとても詳しいです↓

gdbを使ったrubyのデバッグ - クックパッド開発者ブログ

ホストOS上でやっても良いですがDockerコンテナ上でやると環境が汚れないし色々と便利です。

Dockerfileをこんな感じ↓で書いてコンテナ立ち上げると、デバッグ用Rubyが入った環境であれこれできます。

dockerfiles/Dockerfile

gdb周りだとGDB dashboardがとても便利だし見栄えが良くなってデバッグ時のテンションが上がるのでオススメ。

あと、C読むときにハマったのがマクロのところで、例えばこんな感じな関数

static void
raise_event(VALUE trace_point, void *data)
{
  VALUE expn_class, ancestors, pm_context;
  int i;
  debug_context_t *new_dc;

  EVENT_SETUP;

  raised_exception = rb_tracearg_raised_exception(trace_arg);

一見して「trace_argって書いてあるけど、どこにも定義されてないじゃないか！」となりそうなんですが、EVENT_SETUPはマクロでビルド時に展開されるため、EVENT_SETUPの変数定義も入ることになります。

#define EVENT_SETUP                                     \
  debug_context_t *dc;                                  \
  VALUE context;                                        \
  rb_trace_arg_t *trace_arg;                            \
...

また、RubyのC拡張のコードにはRubyのオブジェクトの中にCの構造体をラップして入れる機能があります。

byebugで言うと、steps_outなどのメンバ変数を持つdebug_context_t構造体はContextオブジェクト内にラップされています。以下はContextオブジェクトを新規に生成するcontext_create関数の定義です。debug_context_tを新規に生成してそれをラップしたContextオブジェクトをData_Wrap_Struct関数によって生成しています。

extern VALUE
context_create(VALUE thread)
{
  debug_context_t *context = ALLOC(debug_context_t);
# ...
  return Data_Wrap_Struct(cContext, context_mark, 0, context);
}

ラップされたオブジェクトからデータを取得するにはData_Get_Struct関数を使います。Contextの各メソッドは内部のdebug_context_t構造体を取得して構造体のメンバ変数を操作しています。

static VALUE
Context_step_over(int argc, VALUE *argv, VALUE self)
{
# ...
  Data_Get_Struct(self, debug_context_t, context);

  if (context->calced_stack_size == 0)
    rb_raise(rb_eRuntimeError, "No frames collected.");

# ...