quebex

A software analysis framework built around the QBE intermediate language

git clone https://git.8pit.net/quebex.git

  1-- SPDX-FileCopyrightText: 2025 Sören Tempel <soeren+git@soeren-tempel.net>
  2--
  3-- SPDX-License-Identifier: MIT AND GPL-3.0-only
  4{-# LANGUAGE PatternSynonyms #-}
  5
  6module SimpleBV
  7  ( SExpr,
  8    SMT.Solver,
  9    SMT.defaultConfig,
 10    SMT.newLogger,
 11    SMT.newLoggerWithHandle,
 12    SMT.newSolver,
 13    SMT.newSolverWithConfig,
 14    SMT.solverLogger,
 15    SMT.smtSolverLogger,
 16    SMT.setLogic,
 17    SMT.push,
 18    SMT.pop,
 19    SMT.popMany,
 20    SMT.check,
 21    SMT.Result (..),
 22    SMT.Value (..),
 23    pattern W,
 24    pattern Byte,
 25    pattern Half,
 26    pattern Word,
 27    pattern Long,
 28    width,
 29    const,
 30    declareBV,
 31    assert,
 32    sexprToVal,
 33    getValue,
 34    getValues,
 35    toSMT,
 36    ite,
 37    and,
 38    or,
 39    not,
 40    eq,
 41    bvLit,
 42    bvAdd,
 43    bvAShr,
 44    bvLShr,
 45    bvAnd,
 46    bvMul,
 47    bvNeg,
 48    bvOr,
 49    bvSDiv,
 50    bvSLeq,
 51    bvSLt,
 52    bvSGeq,
 53    bvSGt,
 54    bvSRem,
 55    bvShl,
 56    bvSub,
 57    bvUDiv,
 58    bvULeq,
 59    bvUGeq,
 60    bvUGt,
 61    bvULt,
 62    bvURem,
 63    bvXOr,
 64    concat,
 65    extract,
 66    signExtend,
 67    zeroExtend,
 68  )
 69where
 70
 71import Data.Bits (shiftL, shiftR, (.&.))
 72import SimpleSMT qualified as SMT
 73import Prelude hiding (and, concat, const, not, or)
 74
 75data Expr a
 76  = Var String
 77  | Int Integer
 78  | And a a
 79  | Or a a
 80  | Neg a
 81  | Not a
 82  | Eq a a
 83  | BvAdd a a
 84  | BvAShr a a
 85  | BvLShr a a
 86  | BvAnd a a
 87  | BvMul a a
 88  | BvOr a a
 89  | BvSDiv a a
 90  | BvSLeq a a
 91  | BvSLt a a
 92  | BvSGeq a a
 93  | BvSGt a a
 94  | BvSRem a a
 95  | BvShl a a
 96  | BvSub a a
 97  | BvUDiv a a
 98  | BvULeq a a
 99  | BvUGeq a a
100  | BvUGt a a
101  | BvULt a a
102  | BvURem a a
103  | BvXOr a a
104  | Concat a a
105  | Ite a a a
106  | Extract Int Int a
107  | SignExtend Integer a
108  | ZeroExtend Integer a
109  deriving (Show, Eq)
110
111data SExpr
112  = SExpr
113  { width :: Int,
114    sexpr :: Expr SExpr
115  }
116  deriving (Show, Eq)
117
118toSMT :: SExpr -> SMT.SExpr
119toSMT expr =
120  case sexpr expr of
121    (Var name) -> SMT.const name
122    (Int v) -> SMT.List [SMT.Atom "_", SMT.Atom ("bv" ++ show v), SMT.Atom $ show (width expr)]
123    (Or lhs rhs) -> SMT.or (toSMT lhs) (toSMT rhs)
124    (Ite cond lhs rhs) -> SMT.ite (toSMT cond) (toSMT lhs) (toSMT rhs)
125    (And lhs rhs) -> SMT.and (toSMT lhs) (toSMT rhs)
126    (Not v) -> SMT.not (toSMT v)
127    (Neg v) -> SMT.bvNeg (toSMT v)
128    (SignExtend n v) -> SMT.signExtend n (toSMT v)
129    (ZeroExtend n v) -> SMT.zeroExtend n (toSMT v)
130    (Eq lhs rhs) -> SMT.eq (toSMT lhs) (toSMT rhs)
131    (Concat lhs rhs) -> SMT.concat (toSMT lhs) (toSMT rhs)
132    (Extract o w e) -> SMT.extract (toSMT e) (fromIntegral $ o + w - 1) (fromIntegral o)
133    (BvAnd lhs rhs) -> SMT.bvAnd (toSMT lhs) (toSMT rhs)
134    (BvAShr lhs rhs) -> SMT.bvAShr (toSMT lhs) (toSMT rhs)
135    (BvLShr lhs rhs) -> SMT.bvLShr (toSMT lhs) (toSMT rhs)
136    (BvAdd lhs rhs) -> SMT.bvAdd (toSMT lhs) (toSMT rhs)
137    (BvMul lhs rhs) -> SMT.bvMul (toSMT lhs) (toSMT rhs)
138    (BvOr lhs rhs) -> SMT.bvOr (toSMT lhs) (toSMT rhs)
139    (BvSDiv lhs rhs) -> SMT.bvSDiv (toSMT lhs) (toSMT rhs)
140    (BvSLeq lhs rhs) -> SMT.bvSLeq (toSMT lhs) (toSMT rhs)
141    (BvSLt lhs rhs) -> SMT.bvSLt (toSMT lhs) (toSMT rhs)
142    (BvSGeq lhs rhs) -> SMT.fun "bvsge" [toSMT lhs, toSMT rhs]
143    (BvSGt lhs rhs) -> SMT.fun "bvsgt" [toSMT lhs, toSMT rhs]
144    (BvSRem lhs rhs) -> SMT.bvSRem (toSMT lhs) (toSMT rhs)
145    (BvShl lhs rhs) -> SMT.bvShl (toSMT lhs) (toSMT rhs)
146    (BvSub lhs rhs) -> SMT.bvSub (toSMT lhs) (toSMT rhs)
147    (BvUDiv lhs rhs) -> SMT.bvUDiv (toSMT lhs) (toSMT rhs)
148    (BvULeq lhs rhs) -> SMT.bvULeq (toSMT lhs) (toSMT rhs)
149    (BvUGeq lhs rhs) -> SMT.fun "bvuge" [toSMT lhs, toSMT rhs]
150    (BvUGt lhs rhs) -> SMT.fun "bvugt" [toSMT lhs, toSMT rhs]
151    (BvULt lhs rhs) -> SMT.bvULt (toSMT lhs) (toSMT rhs)
152    (BvURem lhs rhs) -> SMT.bvURem (toSMT lhs) (toSMT rhs)
153    (BvXOr lhs rhs) -> SMT.bvXOr (toSMT lhs) (toSMT rhs)
154
155boolWidth :: Int
156boolWidth = 1
157
158pattern E :: Expr SExpr -> SExpr
159pattern E expr <- SExpr {sexpr = expr, width = _}
160
161pattern W :: Int -> SExpr
162pattern W w <- SExpr {width = w}
163
164pattern Byte :: SExpr
165pattern Byte <- SExpr {width = 8}
166
167pattern Half :: SExpr
168pattern Half <- SExpr {width = 16}
169
170pattern Word :: SExpr
171pattern Word <- SExpr {width = 32}
172
173pattern Long :: SExpr
174pattern Long <- SExpr {width = 64}
175
176------------------------------------------------------------------------
177
178const :: String -> Int -> SExpr
179const name width = SExpr width (Var name)
180
181declareBV :: SMT.Solver -> String -> Int -> IO SExpr
182declareBV solver name width = do
183  let bits = SMT.tBits $ fromIntegral width
184  SMT.declare solver name bits >> pure (const name width)
185
186bvLit :: Int -> Integer -> SExpr
187bvLit width value = SExpr width (Int value)
188
189sexprToVal :: SExpr -> SMT.Value
190sexprToVal (E (Var n)) = SMT.Other $ SMT.Atom n
191sexprToVal e@(E (Int i)) = SMT.Bits (width e) i
192sexprToVal _ = SMT.Other $ SMT.Atom "_"
193
194assert :: SMT.Solver -> SExpr -> IO ()
195assert solver = SMT.assert solver . toSMT
196
197getValue :: SMT.Solver -> SExpr -> IO SMT.Value
198getValue solver = SMT.getExpr solver . toSMT
199
200getValues :: SMT.Solver -> [SExpr] -> IO [(String, SMT.Value)]
201getValues solver exprs = do
202  map go <$> SMT.getExprs solver (map toSMT exprs)
203  where
204    go :: (SMT.SExpr, SMT.Value) -> (String, SMT.Value)
205    go (SMT.Atom name, value) = (name, value)
206    go _ = error "non-atomic variable in inputVars"
207
208---------------------------------------------------------------------------
209
210ite :: SExpr -> SExpr -> SExpr -> SExpr
211ite cond ifT ifF = SExpr (width ifT) (Ite cond ifT ifF)
212
213not :: SExpr -> SExpr
214not (E (Not cond)) = cond
215not expr = expr {sexpr = Not expr}
216
217and :: SExpr -> SExpr -> SExpr
218and lhs rhs = lhs {sexpr = And lhs rhs}
219
220or :: SExpr -> SExpr -> SExpr
221or lhs rhs = lhs {sexpr = Or lhs rhs}
222
223signExtend :: Integer -> SExpr -> SExpr
224signExtend n expr = SExpr (width expr + fromIntegral n) $ SignExtend n expr
225
226zeroExtend :: Integer -> SExpr -> SExpr
227zeroExtend n expr = SExpr (width expr + fromIntegral n) $ ZeroExtend n expr
228
229------------------------------------------------------------------------
230
231eq' :: SExpr -> SExpr -> SExpr
232eq' lhs rhs = SExpr boolWidth $ Eq lhs rhs
233
234-- Eliminates ITE expressions when comparing with constants values, this is
235-- useful in the QBE context to eliminate comparisons with truth values.
236eq :: SExpr -> SExpr -> SExpr
237eq lexpr@(E (Ite cond (E (Int ifT)) (E (Int ifF)))) rexpr@(E (Int other))
238  | other == ifT = cond
239  | other == ifF = not cond
240  | otherwise = eq' lexpr rexpr
241eq lhs rhs = eq' lhs rhs
242
243concat' :: SExpr -> SExpr -> SExpr
244concat' lhs rhs =
245  SExpr (width lhs + width rhs) $ Concat lhs rhs
246
247-- Replace 0 concats with zero extension: (concat (_ bv0 8) buf6)
248concatZeros :: SExpr -> SExpr -> SExpr
249concatZeros lhs@(E (Int 0)) rhs = zeroExtend (fromIntegral $ width lhs) rhs
250concatZeros lhs rhs = concat' lhs rhs
251
252-- Replaces continuous concat expressions with a single extract expression.
253concat :: SExpr -> SExpr -> SExpr
254concat
255  lhs@(E (Extract loff lwidth latom@(E exprLhs)))
256  rhs@(E (Extract roff rwidth (E exprRhs)))
257    | exprLhs == exprRhs && (roff + rwidth) == loff = extract latom roff (lwidth + rwidth)
258    | otherwise = concatZeros lhs rhs
259concat lhs rhs = concatZeros lhs rhs
260
261extract' :: SExpr -> Int -> Int -> SExpr
262extract' expr off w = SExpr w $ Extract off w expr
263
264-- Eliminate extract expression where the value already has the desired bits.
265extractSameWidth :: SExpr -> Int -> Int -> SExpr
266extractSameWidth expr off w
267  | off == 0 && width expr == w = expr
268  | otherwise = extract' expr off w
269
270-- Eliminate nested extract expression of the same width.
271extractNested :: SExpr -> Int -> Int -> SExpr
272extractNested expr@(E (Extract ioff iwidth _)) off width =
273  if ioff == off && iwidth == width
274    then expr
275    else extractSameWidth expr off width
276extractNested expr off width = extractSameWidth expr off width
277
278-- Performs direct extractions of constant immediate values.
279extractConst :: SExpr -> Int -> Int -> SExpr
280extractConst (E (Int value)) off w =
281  SExpr w . Int $ truncTo (value `shiftR` off) w
282  where
283    truncTo v bits = v .&. ((1 `shiftL` bits) - 1)
284extractConst expr off width = extractNested expr off width
285
286-- This performs constant propagation for subtyping of condition values (i.e.
287-- the conversion from long to word).
288extractIte :: SExpr -> Int -> Int -> SExpr
289extractIte (E (Ite cond ifT@(E (Int _)) ifF@(E (Int _)))) off w =
290  let ex x = extractConst x off w
291   in SExpr w $ Ite cond (ex ifT) (ex ifF)
292extractIte expr off width = extractConst expr off width
293
294extractZeros ::
295  SExpr ->
296  Int ->
297  Int ->
298  SExpr
299extractZeros expr@(E (ZeroExtend extBits inner)) exOff exWidth
300  | exOff >= width inner && extBits > 0 = bvLit exWidth 0 -- only extracting zeros
301  | otherwise = extractIte expr exOff exWidth
302extractZeros outer exOff exWidth = extractIte outer exOff exWidth
303
304extractExt' ::
305  (Integer -> SExpr -> Expr SExpr) ->
306  SExpr ->
307  Integer ->
308  SExpr ->
309  Int ->
310  Int ->
311  SExpr
312extractExt' cons outer extBits inner exOff exWidth
313  -- If we are only extracting the non-extended bytes...
314  | width inner >= exOff + exWidth = extractZeros inner exOff exWidth
315  -- Consider: ((_ extract 31 0) ((_ zero_extend 56) byte))
316  | exWidth < fromIntegral extBits && exOff == 0 =
317      SExpr exWidth $ cons (extBits - fromIntegral exWidth) inner
318  -- No folding...
319  | otherwise = extractZeros outer exOff exWidth
320
321-- Remove ZeroExtend and SignExtend expression where we don't use
322-- the extended bits because we extract below the extended size.
323extractExt :: SExpr -> Int -> Int -> SExpr
324extractExt expr@(E (SignExtend extBits inner)) exOff exWidth =
325  extractExt' SignExtend expr extBits inner exOff exWidth
326extractExt expr@(E (ZeroExtend extBits inner)) exOff exWidth =
327  extractExt' ZeroExtend expr extBits inner exOff exWidth
328extractExt expr off w = extractIte expr off w
329
330extract :: SExpr -> Int -> Int -> SExpr
331extract = extractExt
332
333------------------------------------------------------------------------
334
335binOp' :: (SExpr -> SExpr -> Expr SExpr) -> SExpr -> SExpr -> SExpr
336binOp' op lhs rhs = lhs {sexpr = op lhs rhs}
337
338binOp :: (SExpr -> SExpr -> Expr SExpr) -> SExpr -> SExpr -> SExpr
339-- Consider: (bvslt ((_ zero_extend 24) byte0) ((_ zero_extend 24) byte1))
340-- TODO: The following only works if 'op' does not consider sign-bits. Otherwise,
341--       there is no semantic expression equivalence after this folding operation.
342-- binOp op lhs@(E (ZeroExtend _ lhsInner)) rhs@(E (ZeroExtend _ rhsInner)) =
343--   if width lhsInner == width rhsInner
344--     then binOp op lhsInner rhsInner
345--     else binOp' op lhs rhs
346binOp op lhs rhs = binOp' op lhs rhs
347
348-- TODO: Generate these using template-haskell.
349
350bvNeg :: SExpr -> SExpr
351bvNeg x = x {sexpr = Neg x}
352
353bvAdd :: SExpr -> SExpr -> SExpr
354bvAdd = binOp BvAdd
355
356bvAShr :: SExpr -> SExpr -> SExpr
357bvAShr = binOp BvAShr
358
359bvLShr :: SExpr -> SExpr -> SExpr
360bvLShr = binOp BvLShr
361
362bvAnd :: SExpr -> SExpr -> SExpr
363bvAnd = binOp BvAnd
364
365bvMul :: SExpr -> SExpr -> SExpr
366bvMul = binOp BvMul
367
368bvOr :: SExpr -> SExpr -> SExpr
369bvOr = binOp BvOr
370
371bvSDiv :: SExpr -> SExpr -> SExpr
372bvSDiv = binOp BvSDiv
373
374bvSLeq :: SExpr -> SExpr -> SExpr
375bvSLeq = binOp BvSLeq
376
377bvSLt :: SExpr -> SExpr -> SExpr
378bvSLt = binOp BvSLt
379
380bvSGeq :: SExpr -> SExpr -> SExpr
381bvSGeq = binOp BvSGeq
382
383bvSGt :: SExpr -> SExpr -> SExpr
384bvSGt = binOp BvSGt
385
386bvSRem :: SExpr -> SExpr -> SExpr
387bvSRem = binOp BvSRem
388
389bvShl :: SExpr -> SExpr -> SExpr
390bvShl = binOp BvShl
391
392bvSub :: SExpr -> SExpr -> SExpr
393bvSub = binOp BvSub
394
395bvUDiv :: SExpr -> SExpr -> SExpr
396bvUDiv = binOp BvUDiv
397
398bvULeq :: SExpr -> SExpr -> SExpr
399bvULeq = binOp BvULeq
400
401bvUGeq :: SExpr -> SExpr -> SExpr
402bvUGeq = binOp BvUGeq
403
404bvUGt :: SExpr -> SExpr -> SExpr
405bvUGt = binOp BvUGt
406
407bvULt :: SExpr -> SExpr -> SExpr
408bvULt = binOp BvULt
409
410bvURem :: SExpr -> SExpr -> SExpr
411-- Fold constant bvURem operations which are emitted a lot in our generated
412-- SMT-LIB because of QBE's "shift-value modulo bitsize"-semantics.
413bvURem vlhs@(E (Int lhs)) (E (Int rhs)) =
414  SExpr (width vlhs) $
415    -- XXX: On urem-by-zero, SMT-LIB returns the lhs.
416    if rhs == 0
417      then Int lhs
418      else Int $ lhs `rem` rhs
419bvURem lhs rhs = binOp BvURem lhs rhs
420
421bvXOr :: SExpr -> SExpr -> SExpr
422bvXOr = binOp BvXOr