quebex

A software analysis framework built around the QBE intermediate language

git clone https://git.8pit.net/quebex.git

  1-- SPDX-FileCopyrightText: 2025 Sören Tempel <soeren+git@soeren-tempel.net>
  2--
  3-- SPDX-License-Identifier: MIT AND GPL-3.0-only
  4{-# LANGUAGE PatternSynonyms #-}
  5
  6module SimpleBV
  7  ( SExpr,
  8    SMT.Solver,
  9    SMT.defaultConfig,
 10    SMT.newLogger,
 11    SMT.newLoggerWithHandle,
 12    SMT.newSolver,
 13    SMT.newSolverWithConfig,
 14    SMT.solverLogger,
 15    SMT.smtSolverLogger,
 16    SMT.setLogic,
 17    SMT.push,
 18    SMT.pop,
 19    SMT.popMany,
 20    SMT.check,
 21    SMT.Result (..),
 22    SMT.Value (..),
 23    pattern W,
 24    pattern Byte,
 25    pattern Half,
 26    pattern Word,
 27    pattern Long,
 28    width,
 29    const,
 30    declareBV,
 31    assert,
 32    sexprToVal,
 33    getValue,
 34    getValues,
 35    toSMT,
 36    ite,
 37    and,
 38    or,
 39    not,
 40    eq,
 41    bvLit,
 42    bvAdd,
 43    bvAShr,
 44    bvLShr,
 45    bvAnd,
 46    bvMul,
 47    bvNeg,
 48    bvOr,
 49    bvSDiv,
 50    bvSLeq,
 51    bvSLt,
 52    bvSGeq,
 53    bvSGt,
 54    bvSRem,
 55    bvShl,
 56    bvSub,
 57    bvUDiv,
 58    bvULeq,
 59    bvUGeq,
 60    bvUGt,
 61    bvULt,
 62    bvURem,
 63    bvXOr,
 64    concat,
 65    extract,
 66    signExtend,
 67    zeroExtend,
 68  )
 69where
 70
 71import Control.DeepSeq (NFData, NFData1)
 72import Data.Bits (shiftL, shiftR, (.&.))
 73import GHC.Generics (Generic, Generic1)
 74import SimpleSMT qualified as SMT
 75import Prelude hiding (and, concat, const, not, or)
 76
 77data Expr a
 78  = Var String
 79  | Int Integer
 80  | And a a
 81  | Or a a
 82  | Neg a
 83  | Not a
 84  | Eq a a
 85  | BvAdd a a
 86  | BvAShr a a
 87  | BvLShr a a
 88  | BvAnd a a
 89  | BvMul a a
 90  | BvOr a a
 91  | BvSDiv a a
 92  | BvSLeq a a
 93  | BvSLt a a
 94  | BvSGeq a a
 95  | BvSGt a a
 96  | BvSRem a a
 97  | BvShl a a
 98  | BvSub a a
 99  | BvUDiv a a
100  | BvULeq a a
101  | BvUGeq a a
102  | BvUGt a a
103  | BvULt a a
104  | BvURem a a
105  | BvXOr a a
106  | Concat a a
107  | Ite a a a
108  | Extract Int Int a
109  | SignExtend Integer a
110  | ZeroExtend Integer a
111  deriving (Show, Eq, Generic, Generic1)
112
113instance (NFData a) => NFData (Expr a)
114
115instance NFData1 Expr
116
117data SExpr
118  = SExpr
119  { width :: Int,
120    sexpr :: Expr SExpr
121  }
122  deriving (Show, Eq, Generic)
123
124instance NFData SExpr
125
126toSMT :: SExpr -> SMT.SExpr
127toSMT expr =
128  case sexpr expr of
129    (Var name) -> SMT.const name
130    (Int v) -> SMT.List [SMT.Atom "_", SMT.Atom ("bv" ++ show v), SMT.Atom $ show (width expr)]
131    (Or lhs rhs) -> SMT.or (toSMT lhs) (toSMT rhs)
132    (Ite cond lhs rhs) -> SMT.ite (toSMT cond) (toSMT lhs) (toSMT rhs)
133    (And lhs rhs) -> SMT.and (toSMT lhs) (toSMT rhs)
134    (Not v) -> SMT.not (toSMT v)
135    (Neg v) -> SMT.bvNeg (toSMT v)
136    (SignExtend n v) -> SMT.signExtend n (toSMT v)
137    (ZeroExtend n v) -> SMT.zeroExtend n (toSMT v)
138    (Eq lhs rhs) -> SMT.eq (toSMT lhs) (toSMT rhs)
139    (Concat lhs rhs) -> SMT.concat (toSMT lhs) (toSMT rhs)
140    (Extract o w e) -> SMT.extract (toSMT e) (fromIntegral $ o + w - 1) (fromIntegral o)
141    (BvAnd lhs rhs) -> SMT.bvAnd (toSMT lhs) (toSMT rhs)
142    (BvAShr lhs rhs) -> SMT.bvAShr (toSMT lhs) (toSMT rhs)
143    (BvLShr lhs rhs) -> SMT.bvLShr (toSMT lhs) (toSMT rhs)
144    (BvAdd lhs rhs) -> SMT.bvAdd (toSMT lhs) (toSMT rhs)
145    (BvMul lhs rhs) -> SMT.bvMul (toSMT lhs) (toSMT rhs)
146    (BvOr lhs rhs) -> SMT.bvOr (toSMT lhs) (toSMT rhs)
147    (BvSDiv lhs rhs) -> SMT.bvSDiv (toSMT lhs) (toSMT rhs)
148    (BvSLeq lhs rhs) -> SMT.bvSLeq (toSMT lhs) (toSMT rhs)
149    (BvSLt lhs rhs) -> SMT.bvSLt (toSMT lhs) (toSMT rhs)
150    (BvSGeq lhs rhs) -> SMT.fun "bvsge" [toSMT lhs, toSMT rhs]
151    (BvSGt lhs rhs) -> SMT.fun "bvsgt" [toSMT lhs, toSMT rhs]
152    (BvSRem lhs rhs) -> SMT.bvSRem (toSMT lhs) (toSMT rhs)
153    (BvShl lhs rhs) -> SMT.bvShl (toSMT lhs) (toSMT rhs)
154    (BvSub lhs rhs) -> SMT.bvSub (toSMT lhs) (toSMT rhs)
155    (BvUDiv lhs rhs) -> SMT.bvUDiv (toSMT lhs) (toSMT rhs)
156    (BvULeq lhs rhs) -> SMT.bvULeq (toSMT lhs) (toSMT rhs)
157    (BvUGeq lhs rhs) -> SMT.fun "bvuge" [toSMT lhs, toSMT rhs]
158    (BvUGt lhs rhs) -> SMT.fun "bvugt" [toSMT lhs, toSMT rhs]
159    (BvULt lhs rhs) -> SMT.bvULt (toSMT lhs) (toSMT rhs)
160    (BvURem lhs rhs) -> SMT.bvURem (toSMT lhs) (toSMT rhs)
161    (BvXOr lhs rhs) -> SMT.bvXOr (toSMT lhs) (toSMT rhs)
162
163boolWidth :: Int
164boolWidth = 1
165
166pattern E :: Expr SExpr -> SExpr
167pattern E expr <- SExpr {sexpr = expr, width = _}
168
169pattern W :: Int -> SExpr
170pattern W w <- SExpr {width = w}
171
172pattern Byte :: SExpr
173pattern Byte <- SExpr {width = 8}
174
175pattern Half :: SExpr
176pattern Half <- SExpr {width = 16}
177
178pattern Word :: SExpr
179pattern Word <- SExpr {width = 32}
180
181pattern Long :: SExpr
182pattern Long <- SExpr {width = 64}
183
184------------------------------------------------------------------------
185
186const :: String -> Int -> SExpr
187const name width = SExpr width (Var name)
188
189declareBV :: SMT.Solver -> String -> Int -> IO SExpr
190declareBV solver name width = do
191  let bits = SMT.tBits $ fromIntegral width
192  SMT.declare solver name bits >> pure (const name width)
193
194bvLit :: Int -> Integer -> SExpr
195bvLit width value = SExpr width (Int value)
196
197sexprToVal :: SExpr -> SMT.Value
198sexprToVal (E (Var n)) = SMT.Other $ SMT.Atom n
199sexprToVal e@(E (Int i)) = SMT.Bits (width e) i
200sexprToVal _ = SMT.Other $ SMT.Atom "_"
201
202assert :: SMT.Solver -> SExpr -> IO ()
203assert solver = SMT.assert solver . toSMT
204
205getValue :: SMT.Solver -> SExpr -> IO SMT.Value
206getValue solver = SMT.getExpr solver . toSMT
207
208getValues :: SMT.Solver -> [SExpr] -> IO [(String, SMT.Value)]
209getValues solver exprs = do
210  map go <$> SMT.getExprs solver (map toSMT exprs)
211  where
212    go :: (SMT.SExpr, SMT.Value) -> (String, SMT.Value)
213    go (SMT.Atom name, value) = (name, value)
214    go _ = error "non-atomic variable in inputVars"
215
216---------------------------------------------------------------------------
217
218ite :: SExpr -> SExpr -> SExpr -> SExpr
219ite cond ifT ifF = SExpr (width ifT) (Ite cond ifT ifF)
220
221not :: SExpr -> SExpr
222not (E (Not cond)) = cond
223not expr = expr {sexpr = Not expr}
224
225and :: SExpr -> SExpr -> SExpr
226and lhs rhs = lhs {sexpr = And lhs rhs}
227
228or :: SExpr -> SExpr -> SExpr
229or lhs rhs = lhs {sexpr = Or lhs rhs}
230
231signExtend :: Integer -> SExpr -> SExpr
232signExtend n expr = SExpr (width expr + fromIntegral n) $ SignExtend n expr
233
234zeroExtend :: Integer -> SExpr -> SExpr
235zeroExtend n expr = SExpr (width expr + fromIntegral n) $ ZeroExtend n expr
236
237------------------------------------------------------------------------
238
239eq' :: SExpr -> SExpr -> SExpr
240eq' lhs rhs = SExpr boolWidth $ Eq lhs rhs
241
242-- Eliminates ITE expressions when comparing with constants values, this is
243-- useful in the QBE context to eliminate comparisons with truth values.
244eq :: SExpr -> SExpr -> SExpr
245eq lexpr@(E (Ite cond (E (Int ifT)) (E (Int ifF)))) rexpr@(E (Int other))
246  | other == ifT = cond
247  | other == ifF = not cond
248  | otherwise = eq' lexpr rexpr
249eq lhs rhs = eq' lhs rhs
250
251concat' :: SExpr -> SExpr -> SExpr
252concat' lhs rhs =
253  SExpr (width lhs + width rhs) $ Concat lhs rhs
254
255-- Replace 0 concats with zero extension: (concat (_ bv0 8) buf6)
256concatZeros :: SExpr -> SExpr -> SExpr
257concatZeros lhs@(E (Int 0)) rhs = zeroExtend (fromIntegral $ width lhs) rhs
258concatZeros lhs rhs = concat' lhs rhs
259
260-- Replaces continuous concat expressions with a single extract expression.
261concat :: SExpr -> SExpr -> SExpr
262concat
263  lhs@(E (Extract loff lwidth latom@(E exprLhs)))
264  rhs@(E (Extract roff rwidth (E exprRhs)))
265    | exprLhs == exprRhs && (roff + rwidth) == loff = extract latom roff (lwidth + rwidth)
266    | otherwise = concatZeros lhs rhs
267concat lhs rhs = concatZeros lhs rhs
268
269extract' :: SExpr -> Int -> Int -> SExpr
270extract' expr off w = SExpr w $ Extract off w expr
271
272-- Eliminate extract expression where the value already has the desired bits.
273extractSameWidth :: SExpr -> Int -> Int -> SExpr
274extractSameWidth expr off w
275  | off == 0 && width expr == w = expr
276  | otherwise = extract' expr off w
277
278-- Eliminate nested extract expression of the same width.
279extractNested :: SExpr -> Int -> Int -> SExpr
280extractNested expr@(E (Extract ioff iwidth _)) off width =
281  if ioff == off && iwidth == width
282    then expr
283    else extractSameWidth expr off width
284extractNested expr off width = extractSameWidth expr off width
285
286-- Performs direct extractions of constant immediate values.
287extractConst :: SExpr -> Int -> Int -> SExpr
288extractConst (E (Int value)) off w =
289  SExpr w . Int $ truncTo (value `shiftR` off) w
290  where
291    truncTo v bits = v .&. ((1 `shiftL` bits) - 1)
292extractConst expr off width = extractNested expr off width
293
294-- This performs constant propagation for subtyping of condition values (i.e.
295-- the conversion from long to word).
296extractIte :: SExpr -> Int -> Int -> SExpr
297extractIte (E (Ite cond ifT@(E (Int _)) ifF@(E (Int _)))) off w =
298  let ex x = extractConst x off w
299   in SExpr w $ Ite cond (ex ifT) (ex ifF)
300extractIte expr off width = extractConst expr off width
301
302extractZeros ::
303  SExpr ->
304  Int ->
305  Int ->
306  SExpr
307extractZeros expr@(E (ZeroExtend extBits inner)) exOff exWidth
308  | exOff >= width inner && extBits > 0 = bvLit exWidth 0 -- only extracting zeros
309  | otherwise = extractIte expr exOff exWidth
310extractZeros outer exOff exWidth = extractIte outer exOff exWidth
311
312extractExt' ::
313  (Integer -> SExpr -> Expr SExpr) ->
314  SExpr ->
315  Integer ->
316  SExpr ->
317  Int ->
318  Int ->
319  SExpr
320extractExt' cons outer extBits inner exOff exWidth
321  -- If we are only extracting the non-extended bytes...
322  | width inner >= exOff + exWidth = extractZeros inner exOff exWidth
323  -- Consider: ((_ extract 31 0) ((_ zero_extend 56) byte))
324  | exWidth < fromIntegral extBits && exOff == 0 =
325      SExpr exWidth $ cons (extBits - fromIntegral exWidth) inner
326  -- No folding...
327  | otherwise = extractZeros outer exOff exWidth
328
329-- Remove ZeroExtend and SignExtend expression where we don't use
330-- the extended bits because we extract below the extended size.
331extractExt :: SExpr -> Int -> Int -> SExpr
332extractExt expr@(E (SignExtend extBits inner)) exOff exWidth =
333  extractExt' SignExtend expr extBits inner exOff exWidth
334extractExt expr@(E (ZeroExtend extBits inner)) exOff exWidth =
335  extractExt' ZeroExtend expr extBits inner exOff exWidth
336extractExt expr off w = extractIte expr off w
337
338extract :: SExpr -> Int -> Int -> SExpr
339extract = extractExt
340
341------------------------------------------------------------------------
342
343binOp' :: (SExpr -> SExpr -> Expr SExpr) -> SExpr -> SExpr -> SExpr
344binOp' op lhs rhs = lhs {sexpr = op lhs rhs}
345
346binOp :: (SExpr -> SExpr -> Expr SExpr) -> SExpr -> SExpr -> SExpr
347-- Consider: (bvslt ((_ zero_extend 24) byte0) ((_ zero_extend 24) byte1))
348-- TODO: The following only works if 'op' does not consider sign-bits. Otherwise,
349--       there is no semantic expression equivalence after this folding operation.
350-- binOp op lhs@(E (ZeroExtend _ lhsInner)) rhs@(E (ZeroExtend _ rhsInner)) =
351--   if width lhsInner == width rhsInner
352--     then binOp op lhsInner rhsInner
353--     else binOp' op lhs rhs
354binOp op lhs rhs = binOp' op lhs rhs
355
356-- TODO: Generate these using template-haskell.
357
358bvNeg :: SExpr -> SExpr
359bvNeg x = x {sexpr = Neg x}
360
361bvAdd :: SExpr -> SExpr -> SExpr
362bvAdd = binOp BvAdd
363
364bvAShr :: SExpr -> SExpr -> SExpr
365bvAShr = binOp BvAShr
366
367bvLShr :: SExpr -> SExpr -> SExpr
368bvLShr = binOp BvLShr
369
370bvAnd :: SExpr -> SExpr -> SExpr
371bvAnd = binOp BvAnd
372
373bvMul :: SExpr -> SExpr -> SExpr
374bvMul = binOp BvMul
375
376bvOr :: SExpr -> SExpr -> SExpr
377bvOr = binOp BvOr
378
379bvSDiv :: SExpr -> SExpr -> SExpr
380bvSDiv = binOp BvSDiv
381
382bvSLeq :: SExpr -> SExpr -> SExpr
383bvSLeq = binOp BvSLeq
384
385bvSLt :: SExpr -> SExpr -> SExpr
386bvSLt = binOp BvSLt
387
388bvSGeq :: SExpr -> SExpr -> SExpr
389bvSGeq = binOp BvSGeq
390
391bvSGt :: SExpr -> SExpr -> SExpr
392bvSGt = binOp BvSGt
393
394bvSRem :: SExpr -> SExpr -> SExpr
395bvSRem = binOp BvSRem
396
397bvShl :: SExpr -> SExpr -> SExpr
398bvShl = binOp BvShl
399
400bvSub :: SExpr -> SExpr -> SExpr
401bvSub = binOp BvSub
402
403bvUDiv :: SExpr -> SExpr -> SExpr
404bvUDiv = binOp BvUDiv
405
406bvULeq :: SExpr -> SExpr -> SExpr
407bvULeq = binOp BvULeq
408
409bvUGeq :: SExpr -> SExpr -> SExpr
410bvUGeq = binOp BvUGeq
411
412bvUGt :: SExpr -> SExpr -> SExpr
413bvUGt = binOp BvUGt
414
415bvULt :: SExpr -> SExpr -> SExpr
416bvULt = binOp BvULt
417
418bvURem :: SExpr -> SExpr -> SExpr
419-- Fold constant bvURem operations which are emitted a lot in our generated
420-- SMT-LIB because of QBE's "shift-value modulo bitsize"-semantics.
421bvURem vlhs@(E (Int lhs)) (E (Int rhs)) =
422  SExpr (width vlhs) $
423    -- XXX: On urem-by-zero, SMT-LIB returns the lhs.
424    if rhs == 0
425      then Int lhs
426      else Int $ lhs `rem` rhs
427bvURem lhs rhs = binOp BvURem lhs rhs
428
429bvXOr :: SExpr -> SExpr -> SExpr
430bvXOr = binOp BvXOr