qsym

A symbolic executor for the QBE intermediate language

git clone https://git.8pit.net/qsym.git

  1use libc::{c_int, fork, waitpid};
  2use qbe_reader::types::*;
  3use qbe_reader::Definition;
  4use std::process::exit;
  5
  6use z3::{
  7    ast::{Ast, Bool, BV},
  8    Context,
  9};
 10
 11use crate::error::*;
 12use crate::state::*;
 13use crate::value::*;
 14
 15pub struct Interp<'ctx, 'src> {
 16    v: ValueFactory<'ctx>,
 17    state: State<'ctx, 'src>,
 18    solver: z3::Solver<'ctx>,
 19
 20    // Primitive control-flow tracking for PHI instructions.
 21    prev_label: Option<&'src str>,
 22}
 23
 24struct Path<'ctx, 'src>(Option<Bool<'ctx>>, &'src Block);
 25
 26enum FuncReturn<'ctx, 'src> {
 27    Jump(Path<'ctx, 'src>),
 28    CondJump(Path<'ctx, 'src>, Path<'ctx, 'src>),
 29    Return(Option<BV<'ctx>>),
 30}
 31
 32enum BlockReturn<'ctx, 'src> {
 33    Value(Option<BV<'ctx>>),
 34    Fallthrough(&'src str), // Block label to fall through from
 35}
 36
 37impl<'ctx, 'src> Path<'ctx, 'src> {
 38    pub fn feasible(&self, solver: &z3::Solver<'ctx>) -> bool {
 39        let cond = match &self.0 {
 40            Some(x) => x,
 41            None => return true,
 42        };
 43
 44        let r = solver.check_assumptions(&[cond.clone()]);
 45        match r {
 46            z3::SatResult::Unsat => false,
 47            z3::SatResult::Sat => true,
 48            z3::SatResult::Unknown => panic!("unknown SAT result"),
 49        }
 50    }
 51}
 52
 53impl<'ctx, 'src> Interp<'ctx, 'src> {
 54    pub fn new(
 55        ctx: &'ctx Context,
 56        source: &'src Vec<Definition>,
 57    ) -> Result<Interp<'ctx, 'src>, Error> {
 58        let state = State::new(&ctx, source)?;
 59        Ok(Interp {
 60            v: ValueFactory::new(ctx),
 61            state: state,
 62            solver: z3::Solver::new(&ctx),
 63            prev_label: None,
 64        })
 65    }
 66
 67    fn symbolic_type(&self, name: String, ty: &Type) -> BV<'ctx> {
 68        match ty {
 69            Type::Base(ty) => self.v.from_base(*ty, name),
 70            Type::SubWordType(ty) => self.v.from_subw(*ty, name),
 71            _ => panic!("not implemented"),
 72        }
 73    }
 74
 75    fn make_symbolic(&self, func: &FuncDef, param: &FuncParam) -> BV<'ctx> {
 76        match param {
 77            FuncParam::Regular(ty, name) => {
 78                self.symbolic_type(func.name.to_string() + ":" + name, ty)
 79            }
 80            FuncParam::Env(_) => panic!("env parameters not supported"),
 81            FuncParam::Variadic => panic!("varadic functions not supported"),
 82        }
 83    }
 84
 85    fn lookup_params(&self, params: &Vec<FuncParam>) -> Result<Vec<BV<'ctx>>, Error> {
 86        let mut vec: Vec<BV<'ctx>> = Vec::new();
 87        for param in params.iter() {
 88            match param {
 89                FuncParam::Regular(ty, name) => {
 90                    let mut val = self
 91                        .state
 92                        .get_local(name)
 93                        .ok_or(Error::UnknownVariable(name.to_string()))?;
 94
 95                    // Calls with a sub-word return type define a temporary of
 96                    // base type `w` with its most significant bits unspecified.
 97                    if let Type::SubWordType(swty) = ty {
 98                        val = self.v.extend_subword(*swty, val)
 99                    }
100
101                    vec.push(val);
102                }
103                FuncParam::Env(_) => panic!("env parameters not supported"),
104                FuncParam::Variadic => panic!("varadic functions not supported"),
105            };
106        }
107
108        Ok(vec)
109    }
110
111    fn get_const(&self, constant: &Const) -> Result<BV<'ctx>, Error> {
112        match constant {
113            Const::Number(n) => Ok(self.v.from_base_i64(BaseType::Long, *n)),
114            Const::Global(v) => self
115                .state
116                .get_ptr(v)
117                .ok_or(Error::UnknownVariable(v.to_string())),
118            Const::SFP(_) => panic!("single precision floating points not supported"),
119            Const::DFP(_) => panic!("double precision floating points not supported"),
120        }
121    }
122
123    fn get_dyn_const(&self, dconst: &DynConst) -> Result<BV<'ctx>, Error> {
124        match dconst {
125            DynConst::Const(c) => self.get_const(c),
126            DynConst::Thread(_) => panic!("thread-local constants not supported"),
127        }
128    }
129
130    fn get_value(&self, dest_ty: Option<BaseType>, value: &Value) -> Result<BV<'ctx>, Error> {
131        let bv = match value {
132            Value::LocalVar(var) => self
133                .state
134                .get_local(var)
135                .ok_or(Error::UnknownVariable(var.to_string())),
136            Value::Const(dconst) => Ok(self.get_dyn_const(dconst)?),
137        }?;
138
139        // See https://c9x.me/compile/doc/il-v1.1.html#Subtyping
140        if let Some(x) = dest_ty {
141            if x == BaseType::Word && bv.get_size() == LONG_SIZE {
142                let lsb = bv.extract(31, 0); // XXX
143                assert!(lsb.get_size() == WORD_SIZE);
144                return Ok(lsb);
145            } else if x == BaseType::Word && bv.get_size() != WORD_SIZE {
146                return Err(Error::InvalidSubtyping);
147            }
148        }
149
150        Ok(bv)
151    }
152
153    fn perform_compare(
154        &self,
155        dest_ty: BaseType,
156        op: &CmpOp,
157        bv1: BV<'ctx>,
158        bv2: BV<'ctx>,
159    ) -> BV<'ctx> {
160        let cond = match op {
161            CmpOp::Eq => bv1._eq(&bv2),
162            CmpOp::Ne => bv1._eq(&bv2).not(),
163            CmpOp::Sle => bv1.bvsle(&bv2),
164            CmpOp::Slt => bv1.bvslt(&bv2),
165            CmpOp::Sge => bv1.bvsgt(&bv2),
166            CmpOp::Sgt => bv1.bvsgt(&bv2),
167            CmpOp::Ule => bv1.bvule(&bv2),
168            CmpOp::Ult => bv1.bvult(&bv2),
169            CmpOp::Uge => bv1.bvuge(&bv2),
170            CmpOp::Ugt => bv1.bvugt(&bv2),
171        };
172
173        let true_bv = self.v.from_base_u64(dest_ty, 1);
174        let false_bv = self.v.from_base_u64(dest_ty, 0);
175
176        cond.ite(&true_bv, &false_bv)
177    }
178
179    pub fn perform_binop<F>(
180        &self,
181        dest_ty: BaseType,
182        op: F,
183        o1: &Value,
184        o2: &Value,
185    ) -> Result<BV<'ctx>, Error>
186    where
187        F: Fn(&BV<'ctx>, &BV<'ctx>) -> BV<'ctx>,
188    {
189        let bv1 = self.get_value(Some(dest_ty), o1)?;
190        let bv2 = self.get_value(Some(dest_ty), o2)?;
191        Ok(op(&bv1, &bv2))
192    }
193
194    fn exec_inst(&mut self, dest_ty: BaseType, inst: &Instr) -> Result<BV<'ctx>, Error> {
195        // XXX: This instruction simulator assumes that the instructions are
196        // well-typed. If not, this causes dubious assertion failures everywhere.
197        match inst {
198            Instr::Add(v1, v2) => self.perform_binop(dest_ty, BV::bvadd, v1, v2),
199            Instr::Sub(v1, v2) => self.perform_binop(dest_ty, BV::bvsub, v1, v2),
200            Instr::Mul(v1, v2) => self.perform_binop(dest_ty, BV::bvmul, v1, v2),
201            Instr::UDiv(v1, v2) => self.perform_binop(dest_ty, BV::bvudiv, v1, v2),
202            Instr::Rem(v1, v2) => self.perform_binop(dest_ty, BV::bvsrem, v1, v2),
203            Instr::URem(v1, v2) => self.perform_binop(dest_ty, BV::bvurem, v1, v2),
204            Instr::Or(v1, v2) => self.perform_binop(dest_ty, BV::bvor, v1, v2),
205            Instr::Xor(v1, v2) => self.perform_binop(dest_ty, BV::bvxor, v1, v2),
206            Instr::And(v1, v2) => self.perform_binop(dest_ty, BV::bvand, v1, v2),
207            Instr::Sar(v1, v2) => self.perform_binop(dest_ty, BV::bvashr, v1, v2),
208            Instr::Shr(v1, v2) => self.perform_binop(dest_ty, BV::bvlshr, v1, v2),
209            Instr::Shl(v1, v2) => self.perform_binop(dest_ty, BV::bvshl, v1, v2),
210            Instr::Neg(val) => {
211                let bv = self.get_value(Some(dest_ty), val)?;
212                Ok(bv.bvneg())
213            }
214            Instr::Load(ty, a) => {
215                let size = ValueFactory::loadty_to_size(*ty);
216                assert!(size % 8 == 0);
217                let addr = self.get_value(None, a)?;
218                let value = self.state.mem.load_bitvector(addr, size as u64 / 8);
219
220                // For types smaller than long, two variants of the load
221                // instruction are available: one will sign extend the
222                // loaded value, while the other will zero extend it.
223                if size < LONG_SIZE {
224                    if ty.is_signed() {
225                        Ok(self.v.sign_ext_to(dest_ty, value))
226                    } else {
227                        Ok(self.v.zero_ext_to(dest_ty, value))
228                    }
229                } else {
230                    Ok(value)
231                }
232            }
233            Instr::Alloc(align, size) => {
234                let addr = self.state.stack_alloc(align.byte_align(), *size);
235                Ok(addr)
236            }
237            Instr::Compare(ty, op, v1, v2) => {
238                let bv1 = self.get_value(Some(*ty), v1)?;
239                let bv2 = self.get_value(Some(*ty), v2)?;
240                Ok(self.perform_compare(dest_ty, op, bv1, bv2))
241            }
242            Instr::Ext(ty, v) => {
243                let bv = self.get_value(None, v)?;
244                let to_type = self.v.trunc_to(*ty, bv);
245                if ty.is_signed() {
246                    Ok(self.v.sign_ext_to(dest_ty, to_type))
247                } else {
248                    Ok(self.v.zero_ext_to(dest_ty, to_type))
249                }
250            }
251        }
252    }
253
254    fn exec_volatile(&mut self, instr: &VolatileInstr) -> Result<(), Error> {
255        match instr {
256            VolatileInstr::Store(ty, v, a) => {
257                let value = self.get_value(None, v)?;
258                let addr = self.get_value(None, a)?;
259                self.state
260                    .mem
261                    .store_bitvector(addr, self.v.cast_to(*ty, value));
262            }
263            _ => todo!(),
264        }
265
266        Ok(())
267    }
268
269    fn exec_stat(&mut self, stat: &'src Statement) -> Result<(), Error> {
270        match stat {
271            Statement::Assign(dest, base, inst) => {
272                let result = self.exec_inst(*base, &inst)?;
273                self.state.add_local(dest, result);
274            }
275            Statement::Volatile(instr) => {
276                self.exec_volatile(instr)?;
277            }
278            Statement::Call(dest, _ty, fname, params) => {
279                let values = self.lookup_params(params)?;
280                let func = self
281                    .state
282                    .get_func(fname)
283                    .ok_or(Error::UnknownFunction(fname.to_string()))?;
284
285                let result = self.exec_func(func, values)?;
286                if let Some(ret_val) = result {
287                    self.state.add_local(dest, ret_val);
288                }
289            }
290        }
291
292        Ok(())
293    }
294
295    fn get_block(&self, label: &str) -> Result<&'src Block, Error> {
296        self.state
297            .get_block(label)
298            .ok_or(Error::UnknownLabel(label.to_string()))
299    }
300
301    fn exec_jump(&self, instr: &JumpInstr) -> Result<FuncReturn<'ctx, 'src>, Error> {
302        match instr {
303            JumpInstr::Jump(label) => {
304                let path = Path(None, self.get_block(label)?);
305                Ok(FuncReturn::Jump(path))
306            }
307            JumpInstr::Jnz(value, nzero_label, zero_label) => {
308                let bv = self.get_value(Some(BaseType::Word), value)?;
309
310                assert!(bv.get_size() == WORD_SIZE);
311                let is_zero = bv._eq(&self.v.make_word(0));
312
313                let nzero_path = Path(Some(is_zero.not()), self.get_block(nzero_label)?);
314                let zero_path = Path(Some(is_zero.clone()), self.get_block(zero_label)?);
315
316                let zero_feasible = zero_path.feasible(&self.solver);
317                if zero_feasible && nzero_path.feasible(&self.solver) {
318                    Ok(FuncReturn::CondJump(nzero_path, zero_path))
319                } else if zero_feasible {
320                    Ok(FuncReturn::Jump(zero_path))
321                } else {
322                    Ok(FuncReturn::Jump(nzero_path))
323                }
324            }
325            JumpInstr::Return(opt_val) => match opt_val {
326                Some(x) => Ok(FuncReturn::Return(Some(self.get_value(None, x)?))),
327                None => Ok(FuncReturn::Return(None)),
328            },
329            JumpInstr::Halt => {
330                println!("Halting executing");
331                Err(Error::HaltExecution)
332            }
333        }
334    }
335
336    #[inline]
337    fn explore_path(&mut self, path: &Path<'ctx, 'src>) -> Result<BlockReturn<'ctx, 'src>, Error> {
338        println!("[jnz] Exploring path for label '{}'", path.1.label);
339
340        if let Some(c) = &path.0 {
341            self.solver.assert(c);
342        }
343        self.exec_block(path.1)
344    }
345
346    fn exec_block(&mut self, block: &'src Block) -> Result<BlockReturn<'ctx, 'src>, Error> {
347        for phi in block.phi.iter() {
348            match self.prev_label {
349                Some(label) => {
350                    let val = phi
351                        .labels
352                        .get(label)
353                        .ok_or(Error::UnknownLabel(label.to_string()))?;
354
355                    let bv = self.get_value(Some(phi.base_type), val)?;
356                    self.state.add_local(&phi.ident, bv);
357                }
358                None => return Err(Error::PhiAtFuncStart),
359            }
360        }
361
362        for stat in block.inst.iter() {
363            self.exec_stat(stat)?;
364        }
365
366        let jump = match &block.jump {
367            Some(x) => x,
368            None => return Ok(BlockReturn::Fallthrough(&block.label)),
369        };
370
371        let targets = self.exec_jump(jump)?;
372        self.prev_label = Some(&block.label);
373        match targets {
374            // For conditional jumps, we fork(3) the entire interpreter process.
375            // This is, obviously, horribly inefficient and will lead to memory
376            // explosion issues for any somewhat complex program. In the future,
377            // the State module should be modified to allow efficient copies of
378            // the state by leveraging a copy-on-write mechanism.
379            FuncReturn::CondJump(path1, path2) => unsafe {
380                let pid = fork();
381                match pid {
382                    -1 => Err(Error::ForkFailed),
383                    0 => self.explore_path(&path1),
384                    _ => {
385                        let mut status = 0 as c_int;
386                        if waitpid(pid, &mut status as *mut c_int, 0) == -1 {
387                            Err(Error::WaitpidFailed)
388                        } else {
389                            if status != 0 {
390                                exit(status);
391                            }
392                            self.explore_path(&path2)
393                        }
394                    }
395                }
396            },
397            FuncReturn::Jump(path) => self.explore_path(&path),
398            FuncReturn::Return(value) => {
399                // TODO: Treat return from entry point function like `hlt` for now.
400                if self.state.stack_size() == 1 {
401                    Err(Error::HaltExecution)
402                } else {
403                    Ok(BlockReturn::Value(value))
404                }
405            }
406        }
407    }
408
409    pub fn exec_func(
410        &mut self,
411        func: &'src FuncDef,
412        params: Vec<BV<'ctx>>,
413    ) -> Result<Option<BV<'ctx>>, Error> {
414        self.state.push_func(func);
415
416        if func.params.len() != params.len() {
417            return Err(Error::InvalidCall);
418        }
419        for i in 0..func.params.len() {
420            let name = func.params[i].get_name().unwrap();
421            let bv = params[i].clone();
422            self.state.add_local(name, bv);
423        }
424
425        let mut block = func.body.first();
426        while block.is_some() {
427            // .unwrap() won't panic due to loop condition.
428            let cur_block = block.unwrap();
429
430            match self.exec_block(cur_block) {
431                Err(Error::HaltExecution) => {
432                    self.dump();
433                    return Ok(None);
434                }
435                Err(x) => return Err(x),
436                Ok(r) => match r {
437                    BlockReturn::Value(v) => {
438                        self.state.pop_func();
439                        return Ok(v);
440                    }
441                    BlockReturn::Fallthrough(label) => {
442                        let mut it = func.body.iter();
443                        let cur = it.find(|b| b.label == label);
444                        assert!(cur.is_some() && cur.unwrap().label == label);
445
446                        block = it.next();
447                        self.prev_label = Some(label);
448                        continue;
449                    }
450                },
451            }
452        }
453
454        // Last block is not terminated by a jump instruction.
455        Err(Error::MissingJump)
456    }
457
458    pub fn exec_symbolic(&mut self, name: &String) -> Result<(), Error> {
459        let func = self
460            .state
461            .get_func(name)
462            .ok_or(Error::UnknownFunction(name.to_string()))?;
463
464        let params = func
465            .params
466            .iter()
467            .map(|p| self.make_symbolic(func, p))
468            .collect();
469        self.exec_func(func, params)?;
470
471        Ok(())
472    }
473
474    // XXX: Just a hack to see stuff right now.
475    pub fn dump(&self) {
476        self.solver.check();
477
478        println!("Local variables:");
479        self.state.dump_locals();
480
481        let model = self.solver.get_model();
482        match model {
483            None => panic!("Couldn't generate a Z3 model"),
484            Some(m) => {
485                let out = format!("{}", m);
486                println!("Symbolic variable values:");
487                println!("\t{}", out.replace("\n", "\n\t"));
488            }
489        };
490    }
491}