From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: AS111 204.8.152.0/21 X-Spam-Status: No, score=-2.2 required=3.0 tests=AWL,BAYES_00,RCVD_IN_XBL, URIBL_BLOCKED shortcircuit=no autolearn=no version=3.3.2 X-Original-To: spew@80x24.org Received: from 80x24.org (cs-tor.bu.edu [204.8.156.142]) by dcvr.yhbt.net (Postfix) with ESMTP id 9265C633809 for ; Sat, 22 Nov 2014 00:02:37 +0000 (UTC) From: Eric Wong To: spew@80x24.org Subject: [PATCH] fix rb_iseq_load, a work-in-progress Date: Sat, 22 Nov 2014 00:02:34 +0000 Message-Id: X-Mailer: git-send-email 2.2.0.rc0.37.gf6f61cb List-Id: I'm pretty sure kwargs aren't handled correctly, yet, but the test code for [Feature #8543] works, at least. I've enabled the RubyVM::InstructionSequence.load method for testing, and using the following script to load the associated test scripts in https://bugs.ruby-lang.org/issues/8543 seems to produce the desired result: ----------------------- test.rb ---------------------- data = RubyVM::InstructionSequence.compile_file('hello.rb').to_a iseq = RubyVM::InstructionSequence.load(data) iseq.eval ----------------------- results ---------------------- tralivali Hello, world1! Hello, world2! Hello, world4! --- compile.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- iseq.c | 15 ++--- 2 files changed, 152 insertions(+), 51 deletions(-) diff --git a/compile.c b/compile.c index 4e4101f..9ea24d8 100644 --- a/compile.c +++ b/compile.c @@ -5913,68 +5913,174 @@ iseq_build_from_ary_body(rb_iseq_t *iseq, LINK_ANCHOR *anchor, } #define CHECK_ARRAY(v) rb_convert_type((v), T_ARRAY, "Array", "to_ary") -#define CHECK_STRING(v) rb_convert_type((v), T_STRING, "String", "to_str") #define CHECK_SYMBOL(v) rb_convert_type((v), T_SYMBOL, "Symbol", "to_sym") -static inline VALUE CHECK_INTEGER(VALUE v) {(void)NUM2LONG(v); return v;} + +static int int_param(int *dst, VALUE param, VALUE sym) +{ + VALUE val = rb_hash_aref(param, sym); + switch (TYPE(val)) { + case T_NIL: + return FALSE; + case T_FIXNUM: + *dst = FIX2INT(val); + return TRUE; + default: + rb_raise(rb_eTypeError, "invalid %+"PRIsVALUE" Fixnum: %+"PRIsVALUE, + sym, val); + } + return FALSE; +} + +static void iseq_build_kw(rb_iseq_t *iseq, VALUE keywords, int bits_start) +{ + int i, j; + int len = RARRAY_LENINT(keywords); + int default_len; + VALUE key, sym, default_val; + + iseq->param.flags.has_kw = !!len; + + if (!iseq->param.flags.has_kw) return; + + iseq->param.keyword = ZALLOC(struct rb_iseq_param_keyword); + iseq->param.keyword->num = len; + iseq->param.keyword->bits_start = bits_start; + i = iseq->param.keyword->bits_start - iseq->param.keyword->num; + iseq->param.keyword->table = &iseq->local_table[i]; + + /* required args */ + for (i = 0; i < len; i++) { + VALUE val = RARRAY_AREF(keywords, i); + + if (!SYMBOL_P(val)) { + goto default_values; + } + iseq->param.keyword->table[i] = SYM2ID(val); + iseq->param.keyword->required_num++; + } + +default_values: /* note: we intentionally preserve `i' from previous loop */ + default_len = len - i; + if (default_len == 0) { + return; + } + + iseq->param.keyword->default_values = ALLOC_N(VALUE, default_len); + + for (j = 0; i < len; i++, j++) { + key = RARRAY_AREF(keywords, i); + CHECK_ARRAY(key); + + switch (RARRAY_LEN(key)) { + case 1: + sym = RARRAY_AREF(key, 0); + default_val = Qundef; + break; + case 2: + sym = RARRAY_AREF(key, 0); + default_val = RARRAY_AREF(key, 1); + break; + default: + rb_raise(rb_eTypeError, + "keyword default has unsupported len %+"PRIsVALUE, + key); + } + iseq->param.keyword->table[i] = SYM2ID(sym); + iseq->param.keyword->default_values[j] = default_val; + } +} VALUE -rb_iseq_build_from_ary(rb_iseq_t *iseq, VALUE locals, VALUE args, +rb_iseq_build_from_ary(rb_iseq_t *iseq, VALUE locals, VALUE params, VALUE exception, VALUE body) { - int i; +#define SYM(s) ID2SYM(rb_intern(#s)) + int i, len; + int bits_start = 0; ID *tbl; struct st_table *labels_table = st_init_numtable(); + VALUE arg_opt_labels = rb_hash_aref(params, SYM(opt)); + VALUE keywords = rb_hash_aref(params, SYM(keyword)); + VALUE sym_arg_rest = ID2SYM(rb_intern("#arg_rest")); DECL_ANCHOR(anchor); INIT_ANCHOR(anchor); - iseq->local_table_size = RARRAY_LENINT(locals); + len = RARRAY_LENINT(locals); + iseq->local_table_size = len; iseq->local_table = tbl = (ID *)ALLOC_N(ID, iseq->local_table_size); iseq->local_size = iseq->local_table_size + 1; - for (i=0; iparam.size = iseq->param.lead_num = FIX2INT(args); - iseq->param.flags.has_lead = TRUE; - } - else { - int i = 0; - VALUE argc = CHECK_INTEGER(rb_ary_entry(args, i++)); - VALUE arg_opt_labels = CHECK_ARRAY(rb_ary_entry(args, i++)); - VALUE arg_post_num = CHECK_INTEGER(rb_ary_entry(args, i++)); - VALUE arg_post_start = CHECK_INTEGER(rb_ary_entry(args, i++)); - VALUE arg_rest = CHECK_INTEGER(rb_ary_entry(args, i++)); - VALUE arg_block = CHECK_INTEGER(rb_ary_entry(args, i++)); - - iseq->param.lead_num = FIX2INT(argc); - iseq->param.rest_start = FIX2INT(arg_rest); - iseq->param.post_num = FIX2INT(arg_post_num); - iseq->param.post_start = FIX2INT(arg_post_start); - iseq->param.block_start = FIX2INT(arg_block); - iseq->param.opt_num = RARRAY_LENINT(arg_opt_labels) - 1; - iseq->param.opt_table = (VALUE *)ALLOC_N(VALUE, iseq->param.opt_num + 1); - - if (iseq->param.flags.has_block) { - iseq->param.size = iseq->param.block_start + 1; - } - else if (iseq->param.flags.has_post) { - iseq->param.size = iseq->param.post_start + iseq->param.post_num; - } - else if (iseq->param.flags.has_rest) { - iseq->param.size = iseq->param.rest_start + 1; + if (sym_arg_rest == lv) { + bits_start = i; + break; } else { - iseq->param.size = iseq->param.lead_num + iseq->param.opt_num; + tbl[i] = FIXNUM_P(lv) ? (ID)FIX2LONG(lv) : SYM2ID(CHECK_SYMBOL(lv)); } + } + +#define INT_PARAM(F) int_param(&iseq->param.F, params, SYM(F)) + if (INT_PARAM(lead_num)) iseq->param.flags.has_lead = TRUE; + if (INT_PARAM(post_num)) iseq->param.flags.has_post = TRUE; + if (INT_PARAM(post_start)) iseq->param.flags.has_post = TRUE; + if (INT_PARAM(rest_start)) iseq->param.flags.has_rest = TRUE; + if (INT_PARAM(block_start)) iseq->param.flags.has_block = TRUE; +#undef INT_PARAM - for (i=0; iparam.opt_table[i] = (VALUE)register_label(iseq, labels_table, rb_ary_entry(arg_opt_labels, i)); + switch (TYPE(arg_opt_labels)) { + case T_ARRAY: + len = RARRAY_LENINT(arg_opt_labels); + iseq->param.flags.has_opt = !!len; + + if (iseq->param.flags.has_opt) { + iseq->param.opt_num = len - 1; + iseq->param.opt_table = (VALUE *)ALLOC_N(VALUE, len); + + for (i = 0; i < len; i++) { + VALUE ent = RARRAY_AREF(arg_opt_labels, i); + LABEL *label = register_label(iseq, labels_table, ent); + + iseq->param.opt_table[i] = (VALUE)label; + } } + case T_NIL: + break; + default: + rb_raise(rb_eTypeError, ":opt param is not an array: %+"PRIsVALUE, + arg_opt_labels); + } + + switch (TYPE(keywords)) { + case T_ARRAY: + iseq_build_kw(iseq, keywords, bits_start); + case T_NIL: + break; + default: + rb_raise(rb_eTypeError, ":keywords param is not an array: %+"PRIsVALUE, + keywords); + } + + if (iseq->param.flags.has_block) { + iseq->param.size = iseq->param.block_start + 1; + } + else if (iseq->param.flags.has_post) { + iseq->param.size = iseq->param.post_start + iseq->param.post_num; + } + else if (iseq->param.flags.has_rest) { + iseq->param.size = iseq->param.rest_start + 1; + } + else { + iseq->param.size = iseq->param.lead_num + iseq->param.opt_num; + } + + (void)int_param(&iseq->param.keyword->rest_start, params, SYM(kwrest)); + if (Qtrue == rb_hash_aref(params, SYM(ambiguous_param0))) { + iseq->param.flags.ambiguous_param0 = TRUE; } +#undef SYM /* exception */ iseq_build_from_ary_exception(iseq, labels_table, exception); diff --git a/iseq.c b/iseq.c index 08a7918..340eb64 100644 --- a/iseq.c +++ b/iseq.c @@ -466,6 +466,7 @@ rb_iseq_new_with_bopt(NODE *node, VALUE name, VALUE path, VALUE absolute_path, V } #define CHECK_ARRAY(v) rb_convert_type((v), T_ARRAY, "Array", "to_ary") +#define CHECK_HASH(v) rb_convert_type((v), T_HASH, "Hash", "to_hash") #define CHECK_STRING(v) rb_convert_type((v), T_STRING, "String", "to_str") #define CHECK_SYMBOL(v) rb_convert_type((v), T_SYMBOL, "Symbol", "to_sym") static inline VALUE CHECK_INTEGER(VALUE v) {(void)NUM2LONG(v); return v;} @@ -504,7 +505,7 @@ iseq_load(VALUE self, VALUE data, VALUE parent, VALUE opt) VALUE magic, version1, version2, format_type, misc; VALUE name, path, absolute_path, first_lineno; - VALUE type, body, locals, args, exception; + VALUE type, body, locals, params, exception; st_data_t iseq_type; rb_iseq_t *iseq; @@ -533,12 +534,7 @@ iseq_load(VALUE self, VALUE data, VALUE parent, VALUE opt) type = CHECK_SYMBOL(rb_ary_entry(data, i++)); locals = CHECK_ARRAY(rb_ary_entry(data, i++)); - - args = rb_ary_entry(data, i++); - if (FIXNUM_P(args) || (args = CHECK_ARRAY(args))) { - /* */ - } - + params = CHECK_HASH(rb_ary_entry(data, i++)); exception = CHECK_ARRAY(rb_ary_entry(data, i++)); body = CHECK_ARRAY(rb_ary_entry(data, i++)); @@ -559,7 +555,7 @@ iseq_load(VALUE self, VALUE data, VALUE parent, VALUE opt) prepare_iseq_build(iseq, name, path, absolute_path, first_lineno, parent, (enum iseq_type)iseq_type, 0, &option); - rb_iseq_build_from_ary(iseq, locals, args, exception, body); + rb_iseq_build_from_ary(iseq, locals, params, exception, body); cleanup_iseq_build(iseq); return iseqval; @@ -2317,8 +2313,7 @@ Init_ISeq(void) #endif /* disable this feature because there is no verifier. */ - /* rb_define_singleton_method(rb_cISeq, "load", iseq_s_load, -1); */ - (void)iseq_s_load; + rb_define_singleton_method(rb_cISeq, "load", iseq_s_load, -1); rb_define_singleton_method(rb_cISeq, "compile", iseq_s_compile, -1); rb_define_singleton_method(rb_cISeq, "new", iseq_s_compile, -1); -- EW