From: Dan Carpenter <dan.carpenter@oracle.com>
To: Elena Reshetova <elena.reshetova@intel.com>
Cc: smatch@vger.kernel.org
Subject: Re: [PATCH] Quick trial on tracing host inputs
Date: Tue, 8 Mar 2022 14:34:16 +0300 [thread overview]
Message-ID: <20220308113416.GG3315@kadam> (raw)
In-Reply-To: <20220308085630.551547-1-elena.reshetova@intel.com>
On Tue, Mar 08, 2022 at 10:56:30AM +0200, Elena Reshetova wrote:
> This is just a quick trial to trace inputs received
> from the host/VMM in the same way as user inputs.
>
> Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
> ---
> smatch_kernel_host_data.c | 1320 ++++++++++++++++++++++++++++++++++
> smatch_points_to_host_data.c | 334 +++++++++
The changes to smatch.h and check_list.h are missing.
> 2 files changed, 1654 insertions(+)
> create mode 100755 smatch_kernel_host_data.c
> create mode 100755 smatch_points_to_host_data.c
>
> diff --git a/smatch_kernel_host_data.c b/smatch_kernel_host_data.c
> new file mode 100755
> index 00000000..540875c5
> --- /dev/null
> +++ b/smatch_kernel_host_data.c
> @@ -0,0 +1,1320 @@
> +/*
> + * Copyright (C) 2011 Dan Carpenter.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, see http://www.gnu.org/copyleft/gpl.txt
> + */
> +
> +/* Note: The below code is just a quick trial to modify the
> + * smatch_kernel_host_data.c to work on data received from a
> + * untrusted host/VMM.
> + * Similar as smatch_kernel_user_data.c it works with
> + * smatch_points_to_host_data.c code. It also uses some helper functions
> + * from the check_host_input.c pattern.
> + */
> +
> +#include "smatch.h"
> +#include "smatch_slist.h"
> +#include "smatch_extra.h"
> +#include <math.h>
> +
> +const char *host_input_funcs[] = {
> + "inb", "inw", "inl", "inb_p", "inw_p", "inl_p", "insb", "insw", "insl", "get_dma_residue", "ioread8", "ioread16", "ioread32",
> + "ioread16be", "ioread32be", "ioread64_lo_hi", "ioread64_hi_lo", "ioread64be_lo_hi", "ioread64be_hi_lo", "ioread8_rep",
> + "ioread16_rep", "ioread32_rep", "__ioread32_copy", "iomap_readq", "iomap_readb", "iomap_readw", "iomap_readl", "memcpy_fromio",
> + "mmio_insb", "mmio_insw", "mmio_insl", "readb", "readw", "readl", "readq", "readsb", "readsw", "readsl", "readsq", "__readb", "__readw",
> + "__readl", "__readq", "__readsb", "__readsw", "__readsl", "__readsq", "__raw_readb", "__raw_readw", "__raw_readl", "__raw_readq",
> + "lo_hi_readq", "hi_lo_readq", "lo_hi_readq_relaxed", "hi_lo_readq_relaxed", "readb_relaxed", "readw_relaxed", "readl_relaxed",
> + "readq_relaxed", "native_read_msr", "native_read_msr_safe", "__rdmsr", "rdmsrl", "rdmsrl_safe", "rdmsr_on_cpu", "rdmsrl_on_cpu",
> + "rdmsr_on_cpus", "rdmsr_safe_on_cpu", "rdmsrl_safe_on_cpu", "paravirt_read_msr", "paravirt_read_msr_safe", "read_msr", "msr_read",
> + "native_apic_msr_read", "native_apic_mem_read", "native_apic_icr_read", "apic_read", "apic_icr_read", "native_x2apic_icr_read",
> + "io_apic_read", "native_io_apic_read", "__ioapic_read_entry", "ioapic_read_entry", "vp_ioread8", "vp_ioread16", "vp_ioread32",
> + "__virtio_cread_many", "virtio_cread", "virtio_cread_le", "virtio_cread8", "virtio_cread16", "virtio_cread32", "virtio_cread64",
> + "virtio_cread_bytes", "virtio16_to_cpu", "virtio32_to_cpu", "virtio64_to_cpu", "__virtio16_to_cpu", "__virtio32_to_cpu",
> + "__virtio64_to_cpu", "virtqueue_get_buf", "vringh16_to_cpu", "vringh32_to_cpu", "vringh64_to_cpu", "tap16_to_cpu", "tun16_to_cpu",
> + "read_pci_config", "read_pci_config_byte", "read_pci_config_16", "raw_pci_read", "pci_read", "pci_read_config_byte",
> + "pci_read_config_word", "pci_read_config_dword", "pci_bus_read_config_byte", "pci_bus_read_config_word",
> + "pci_bus_read_config_dword", "pci_generic_config_read", "pci_generic_config_read32", "pci_user_read_config_byte",
> + "pci_user_read_config_word", "pci_user_read_config_dword", "pcie_capability_read_word", "pcie_capability_read_dword",
> + "pci_read_vpd", "serial8250_early_in", "serial_dl_read", "serial8250_in_MCR", "serial_in", "serial_port_in", "serial_icr_read",
> + "serial8250_rx_chars", "dw8250_readl_ext", "udma_readl", "sio_read_reg", "irq_readl_be", "irq_reg_readl", "fw_cfg_read_blob",
> + "acpi_os_read_iomem", "acpi_os_read_port", "acpi_hw_read_multiple", "acpi_hw_read", "acpi_hw_read_port", "acpi_hw_register_read",
> + "acpi_hw_gpe_read", "apei_read", "acpi_read", "__apei_exec_read_register", "cpc_read", "hv_get_register", "iosf_mbi_read",
> + "cpuid", "cpuid_count", "cpuid_eax", "cpuid_ebx", "cpuid_ecx", "cpuid_edx"
> +
> +};
> +
> +
> +static int my_id;
> +static int my_call_id;
> +
> +STATE(called);
This state is not used here. Even in smatch_kernel_user_data.c it
should be moved out of there into a separate file...
> +static unsigned long func_gets_host_data;
> +static struct stree *start_states;
> +
> +static void save_start_states(struct statement *stmt)
> +{
> + start_states = clone_stree(__get_cur_stree());
> +}
> +
> +static void free_start_states(void)
> +{
> + free_stree(&start_states);
> +}
> +
No need to do this these days. It's stored in get_start_states().
> +static struct smatch_state *empty_state(struct sm_state *sm)
> +{
> + return alloc_estate_empty();
> +}
> +
> +static struct smatch_state *new_state(struct symbol *type)
> +{
> + struct smatch_state *state;
> +
> + if (!type || type_is_ptr(type))
> + return NULL;
> +
> + state = alloc_estate_whole(type);
> + estate_set_new(state);
> +
> + return state;
This code is supposed to differentiate between places where we return
user data because it was passed to us or we got fresh user data inside
the function. In smatch_kernel_user_data.c it's only a single question
"did we recieve user data? Y/N." But it should be "Was variable foo->bar
user data."
I would probably pull returned user data out into a separate file these
days. And have a different file for if a variable currently holds user
data or if we pass user data to a function.
> +}
> +
> +static void pre_merge_hook(struct sm_state *cur, struct sm_state *other)
> +{
> + struct smatch_state *user = cur->state;
> + struct smatch_state *extra;
> + struct smatch_state *state;
> + struct range_list *rl;
> +
> + extra = __get_state(SMATCH_EXTRA, cur->name, cur->sym);
> + if (!extra)
> + return;
> + rl = rl_intersection(estate_rl(user), estate_rl(extra));
> + state = alloc_estate_rl(clone_rl(rl));
> + if (estate_capped(user) || is_capped_var_sym(cur->name, cur->sym))
> + estate_set_capped(state);
> + if (estate_treat_untagged(user))
> + estate_set_treat_untagged(state);
Tagged is for ARM tagged pointers. It's probably not something you
need. Search for "tag" and delete.
> + if (estates_equiv(state, cur->state))
> + return;
> + set_state(my_id, cur->name, cur->sym, state);
> +}
> +
[ snip ]
> +
> +extern uint get_arg_bitmask(struct expression *expr);
> +static struct expression *ignore_param_set;
> +extern bool is_ignored_func(struct expression *expr);
> +
> +static void match_host_input(const char *fn, struct expression *expr)
> +{
> +
> + uint arg_bitmask = 0;
> +
> + if (!expr)
> + return;
> +
> + arg_bitmask = get_arg_bitmask(expr);
> +
> + if (!arg_bitmask) /* function returns host data, handled via match_returns_host_rl */
> + return;
> +
> + if (is_ignored_func(expr))
> + return;
> +
> + func_gets_host_data = true;
> + ignore_param_set = expr;
> +
> + switch((uint)log2(arg_bitmask)) {
> + case 0xC:
> + tag_argument(expr, 2);
> + tag_argument(expr, 3);
> + break;
> + case 0x36:
> + tag_argument(expr, 1);
> + tag_argument(expr, 2);
> + tag_argument(expr, 3);
> + tag_argument(expr, 4);
> + break;
> + case 0x74:
> + tag_argument(expr, 2);
> + tag_argument(expr, 3);
> + tag_argument(expr, 4);
> + tag_argument(expr, 5);
> + break;
> + default:
> + tag_argument(expr, (uint)log2(arg_bitmask));
> + break;
> + }
> +
> + return;
> +}
This function would be better re-written with param key API. Sorry
that Smatch doesn't have documentation. :( An example, of how that
works is in check_unwind.c, instead of a bitmap you would just have
multiple entries in the table.
I'm typing directly into my email client so this might not compile...
struct host_fn_info {
const char *name;
int type;
int param;
const char *key;
const sval_t *implies_start, *implies_end;
func_hook *call_back;
};
static struct host_fn_info func_table[] = {
{ "memcpy_fromio", HOST_DATA, 0, "*$" },
...
{ "cpuid", HOST_DATA, 1, "*$" },
{ "cpuid", HOST_DATA, 2, "*$" },
{ "cpuid", HOST_DATA, 3, "*$" },
{ "cpuid", HOST_DATA, 4, "*$" },
};
static void set_param_host_data(struct expression *expr, const char *name, struct symbol *sym, void *data)
{
struct expression *arg;
struct range_list *rl;
arg = gen_expression_from_name_sym(name, sym);
if (strcmp(key, "*$") == 0) {
tag_as_user_data(arg);
return;
}
// make sure that smatch_extra was run first
get_absolute_rl(arg, &rl);
set_state(my_id, name, sym, alloc_estate_rl(rl));
}
The code you have should work though and it should propagate.
What we need is some simple test cases.
//---
void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
unsigned int a, b, c, d e;
unsigned int test(void)
{
cpuid(0, &a, &b, &c, &d);
__smatch_states("host");
return a;
}
unsigned int test2(void)
{
unsigned int x = test();
__smatch_states("host");
}
regards,
dan carpenter
next prev parent reply other threads:[~2022-03-08 11:35 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-03-08 8:56 [PATCH] Quick trial on tracing host inputs Elena Reshetova
2022-03-08 11:34 ` Dan Carpenter [this message]
2022-03-08 11:37 ` Dan Carpenter
2022-03-08 12:38 ` Dan Carpenter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220308113416.GG3315@kadam \
--to=dan.carpenter@oracle.com \
--cc=elena.reshetova@intel.com \
--cc=smatch@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).