From 9da26a8443695c54ebff558ce7f2d911eaebc5b1 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Wed, 8 Jan 2025 13:57:57 +0100 Subject: [PATCH] Rust: Avoid location based variable analysis --- .../rust/elements/internal/VariableImpl.qll | 135 +++++++++++++----- .../strings/inline-taint-flow.expected | 30 ++++ .../library-tests/dataflow/strings/main.rs | 4 +- .../test/library-tests/variables/Ssa.expected | 5 + .../variables/variables.expected | 2 + .../test/library-tests/variables/variables.ql | 16 ++- .../test/library-tests/variables/variables.rs | 2 +- 7 files changed, 151 insertions(+), 43 deletions(-) diff --git a/rust/ql/lib/codeql/rust/elements/internal/VariableImpl.qll b/rust/ql/lib/codeql/rust/elements/internal/VariableImpl.qll index 77087fa06463..09b58f8ef3a5 100644 --- a/rust/ql/lib/codeql/rust/elements/internal/VariableImpl.qll +++ b/rust/ql/lib/codeql/rust/elements/internal/VariableImpl.qll @@ -165,7 +165,7 @@ module Impl { /** * A path expression that may access a local variable. These are paths that - * only consists of a simple name (i.e., without generic arguments, + * only consist of a simple name (i.e., without generic arguments, * qualifiers, etc.). */ private class VariableAccessCand extends PathExprBase { @@ -231,23 +231,97 @@ module Impl { ) } + private Element getImmediateChildMin(Element e, int index) { + // A child may have multiple positions for different accessors, + // so always use the first + result = getImmediateChildAndAccessor(e, index, _) and + index = min(int i | result = getImmediateChildAndAccessor(e, i, _) | i) + } + + private Element getImmediateChild(Element e, int index) { + result = rank[index + 1](Element res, int i | res = getImmediateChildMin(e, i) | res order by i) + } + + pragma[nomagic] + private Element getImmediateLastChild(Element e) { + exists(int last | + result = getImmediateChild(e, last) and + not exists(getImmediateChild(e, last + 1)) + ) + } + + /** + * Gets the pre-order numbering of `n`, where the immediately enclosing + * variable scope of `n` is `scope`. + */ + pragma[nomagic] + private int getPreOrderNumbering(VariableScope scope, Element n) { + n = scope and + result = 0 + or + exists(Element parent | + not parent instanceof VariableScope + or + parent = scope + | + // first child of a previously numbered node + result = getPreOrderNumbering(scope, parent) + 1 and + n = getImmediateChild(parent, 0) + or + // non-first child of a previously numbered node + exists(Element child, int i | + result = getLastPreOrderNumbering(scope, child) + 1 and + child = getImmediateChild(parent, i) and + n = getImmediateChild(parent, i + 1) + ) + ) + } + + /** + * Gets the pre-order numbering of the _last_ node nested under `n`, where the + * immediately enclosing variable scope of `n` (and the last node) is `scope`. + */ + pragma[nomagic] + private int getLastPreOrderNumbering(VariableScope scope, Element n) { + exists(Element leaf | + result = getPreOrderNumbering(scope, leaf) and + leaf != scope and + ( + not exists(getImmediateChild(leaf, _)) + or + leaf instanceof VariableScope + ) + | + n = leaf + or + getImmediateLastChild(n) = leaf and + not n instanceof VariableScope + ) + or + exists(Element mid | + mid = getImmediateLastChild(n) and + result = getLastPreOrderNumbering(scope, mid) and + not mid instanceof VariableScope and + not n instanceof VariableScope + ) + } + /** * Holds if `v` is named `name` and is declared inside variable scope - * `scope`, and `v` is bound starting from `(line, column)`. + * `scope`. The pre-order numbering of the binding site of `v`, amongst + * all nodes nester under `scope`, is `ord`. */ - private predicate variableDeclInScope( - Variable v, VariableScope scope, string name, int line, int column - ) { + private predicate variableDeclInScope(Variable v, VariableScope scope, string name, int ord) { name = v.getName() and ( parameterDeclInScope(v, scope) and - scope.getLocation().hasLocationFileInfo(_, line, column, _, _) + ord = getPreOrderNumbering(scope, scope) or exists(Pat pat | pat = getAVariablePatAncestor(v) | scope = any(MatchArmScope arm | arm.getPat() = pat and - arm.getLocation().hasLocationFileInfo(_, line, column, _, _) + ord = getPreOrderNumbering(scope, arm) ) or exists(LetStmt let | @@ -255,56 +329,53 @@ module Impl { scope = getEnclosingScope(let) and // for `let` statements, variables are bound _after_ the statement, i.e. // not in the RHS - let.getLocation().hasLocationFileInfo(_, _, _, line, column) + ord = getLastPreOrderNumbering(scope, let) + 1 ) or exists(IfExpr ie, LetExpr let | let.getPat() = pat and ie.getCondition() = let and scope = ie.getThen() and - scope.getLocation().hasLocationFileInfo(_, line, column, _, _) + ord = getPreOrderNumbering(scope, scope) ) or exists(ForExpr fe | fe.getPat() = pat and scope = fe.getLoopBody() and - scope.getLocation().hasLocationFileInfo(_, line, column, _, _) + ord = getPreOrderNumbering(scope, scope) ) or exists(WhileExpr we, LetExpr let | let.getPat() = pat and we.getCondition() = let and scope = we.getLoopBody() and - scope.getLocation().hasLocationFileInfo(_, line, column, _, _) + ord = getPreOrderNumbering(scope, scope) ) ) ) } /** - * Holds if `cand` may access a variable named `name` at - * `(startline, startcolumn, endline, endcolumn)` in the variable scope - * `scope`. + * Holds if `cand` may access a variable named `name` at pre-order number `ord` + * in the variable scope `scope`. * * `nestLevel` is the number of nested scopes that need to be traversed * to reach `scope` from `cand`. */ private predicate variableAccessCandInScope( - VariableAccessCand cand, VariableScope scope, string name, int nestLevel, int startline, - int startcolumn, int endline, int endcolumn + VariableAccessCand cand, VariableScope scope, string name, int nestLevel, int ord ) { name = cand.getName() and scope = [cand.(VariableScope), getEnclosingScope(cand)] and - cand.getLocation().hasLocationFileInfo(_, startline, startcolumn, endline, endcolumn) and + ord = getPreOrderNumbering(scope, cand) and nestLevel = 0 or exists(VariableScope inner | - variableAccessCandInScope(cand, inner, name, nestLevel - 1, _, _, _, _) and + variableAccessCandInScope(cand, inner, name, nestLevel - 1, _) and scope = getEnclosingScope(inner) and - // Use the location of the inner scope as the location of the access, instead of the - // actual access location. This allows us to collapse multiple accesses in inner - // scopes to a single entity - inner.getLocation().hasLocationFileInfo(_, startline, startcolumn, endline, endcolumn) + // Use the pre-order number of the inner scope as the number of the access. This allows + // us to collapse multiple accesses in inner scopes to a single entity + ord = getPreOrderNumbering(scope, inner) ) } @@ -375,15 +446,10 @@ module Impl { } pragma[nomagic] - predicate rankBy( - string name, VariableScope scope, int startline, int startcolumn, int endline, int endcolumn - ) { - variableDeclInScope(this.asVariable(), scope, name, startline, startcolumn) and - endline = -1 and - endcolumn = -1 + predicate rankBy(string name, VariableScope scope, int ord) { + variableDeclInScope(this.asVariable(), scope, name, ord) or - variableAccessCandInScope(this.asVariableAccessCand(), scope, name, _, startline, startcolumn, - endline, endcolumn) + variableAccessCandInScope(this.asVariableAccessCand(), scope, name, _, ord) } } @@ -396,11 +462,10 @@ module Impl { int getRank(VariableScope scope, string name, VariableOrAccessCand v) { v = - rank[result](VariableOrAccessCand v0, int startline, int startcolumn, int endline, - int endcolumn | - v0.rankBy(name, scope, startline, startcolumn, endline, endcolumn) + rank[result](VariableOrAccessCand v0, int ord | + v0.rankBy(name, scope, ord) | - v0 order by startline, startcolumn, endline, endcolumn + v0 order by ord ) } } @@ -440,7 +505,7 @@ module Impl { exists(int rnk | variableReachesRank(scope, name, v, rnk) and rnk = rankVariableOrAccess(scope, name, TVariableOrAccessCandVariableAccessCand(cand)) and - variableAccessCandInScope(cand, scope, name, nestLevel, _, _, _, _) + variableAccessCandInScope(cand, scope, name, nestLevel, _) ) } diff --git a/rust/ql/test/library-tests/dataflow/strings/inline-taint-flow.expected b/rust/ql/test/library-tests/dataflow/strings/inline-taint-flow.expected index 3349f43dcb3d..0e6b9351ac7e 100644 --- a/rust/ql/test/library-tests/dataflow/strings/inline-taint-flow.expected +++ b/rust/ql/test/library-tests/dataflow/strings/inline-taint-flow.expected @@ -1,6 +1,7 @@ models | 1 | Summary: lang:alloc; ::as_str; Argument[self]; ReturnValue; taint | | 2 | Summary: lang:alloc; crate::fmt::format; Argument[0]; ReturnValue; taint | +| 3 | Summary: lang:core; crate::hint::must_use; Argument[0]; ReturnValue; value | edges | main.rs:26:9:26:9 | s | main.rs:27:19:27:25 | s[...] | provenance | | | main.rs:26:13:26:22 | source(...) | main.rs:26:9:26:9 | s | provenance | | @@ -27,6 +28,19 @@ edges | main.rs:77:9:77:18 | formatted3 | main.rs:78:10:78:19 | formatted3 | provenance | | | main.rs:77:22:77:75 | ...::format(...) | main.rs:77:9:77:18 | formatted3 | provenance | | | main.rs:77:34:77:74 | MacroExpr | main.rs:77:22:77:75 | ...::format(...) | provenance | MaD:2 | +| main.rs:82:9:82:10 | s1 | main.rs:86:18:86:25 | MacroExpr | provenance | | +| main.rs:82:9:82:10 | s1 | main.rs:87:18:87:32 | MacroExpr | provenance | | +| main.rs:82:14:82:23 | source(...) | main.rs:82:9:82:10 | s1 | provenance | | +| main.rs:86:10:86:26 | res | main.rs:86:18:86:25 | { ... } | provenance | | +| main.rs:86:18:86:25 | ...::format(...) | main.rs:86:10:86:26 | res | provenance | | +| main.rs:86:18:86:25 | ...::must_use(...) | main.rs:86:10:86:26 | MacroExpr | provenance | | +| main.rs:86:18:86:25 | MacroExpr | main.rs:86:18:86:25 | ...::format(...) | provenance | MaD:2 | +| main.rs:86:18:86:25 | { ... } | main.rs:86:18:86:25 | ...::must_use(...) | provenance | MaD:3 | +| main.rs:87:10:87:33 | res | main.rs:87:18:87:32 | { ... } | provenance | | +| main.rs:87:18:87:32 | ...::format(...) | main.rs:87:10:87:33 | res | provenance | | +| main.rs:87:18:87:32 | ...::must_use(...) | main.rs:87:10:87:33 | MacroExpr | provenance | | +| main.rs:87:18:87:32 | MacroExpr | main.rs:87:18:87:32 | ...::format(...) | provenance | MaD:2 | +| main.rs:87:18:87:32 | { ... } | main.rs:87:18:87:32 | ...::must_use(...) | provenance | MaD:3 | nodes | main.rs:26:9:26:9 | s | semmle.label | s | | main.rs:26:13:26:22 | source(...) | semmle.label | source(...) | @@ -58,6 +72,20 @@ nodes | main.rs:77:22:77:75 | ...::format(...) | semmle.label | ...::format(...) | | main.rs:77:34:77:74 | MacroExpr | semmle.label | MacroExpr | | main.rs:78:10:78:19 | formatted3 | semmle.label | formatted3 | +| main.rs:82:9:82:10 | s1 | semmle.label | s1 | +| main.rs:82:14:82:23 | source(...) | semmle.label | source(...) | +| main.rs:86:10:86:26 | MacroExpr | semmle.label | MacroExpr | +| main.rs:86:10:86:26 | res | semmle.label | res | +| main.rs:86:18:86:25 | ...::format(...) | semmle.label | ...::format(...) | +| main.rs:86:18:86:25 | ...::must_use(...) | semmle.label | ...::must_use(...) | +| main.rs:86:18:86:25 | MacroExpr | semmle.label | MacroExpr | +| main.rs:86:18:86:25 | { ... } | semmle.label | { ... } | +| main.rs:87:10:87:33 | MacroExpr | semmle.label | MacroExpr | +| main.rs:87:10:87:33 | res | semmle.label | res | +| main.rs:87:18:87:32 | ...::format(...) | semmle.label | ...::format(...) | +| main.rs:87:18:87:32 | ...::must_use(...) | semmle.label | ...::must_use(...) | +| main.rs:87:18:87:32 | MacroExpr | semmle.label | MacroExpr | +| main.rs:87:18:87:32 | { ... } | semmle.label | { ... } | subpaths testFailures #select @@ -67,3 +95,5 @@ testFailures | main.rs:71:10:71:19 | formatted1 | main.rs:68:13:68:22 | source(...) | main.rs:71:10:71:19 | formatted1 | $@ | main.rs:68:13:68:22 | source(...) | source(...) | | main.rs:74:10:74:19 | formatted2 | main.rs:68:13:68:22 | source(...) | main.rs:74:10:74:19 | formatted2 | $@ | main.rs:68:13:68:22 | source(...) | source(...) | | main.rs:78:10:78:19 | formatted3 | main.rs:76:17:76:32 | source_usize(...) | main.rs:78:10:78:19 | formatted3 | $@ | main.rs:76:17:76:32 | source_usize(...) | source_usize(...) | +| main.rs:86:10:86:26 | MacroExpr | main.rs:82:14:82:23 | source(...) | main.rs:86:10:86:26 | MacroExpr | $@ | main.rs:82:14:82:23 | source(...) | source(...) | +| main.rs:87:10:87:33 | MacroExpr | main.rs:82:14:82:23 | source(...) | main.rs:87:10:87:33 | MacroExpr | $@ | main.rs:82:14:82:23 | source(...) | source(...) | diff --git a/rust/ql/test/library-tests/dataflow/strings/main.rs b/rust/ql/test/library-tests/dataflow/strings/main.rs index 0194546250b9..975fc99ce130 100644 --- a/rust/ql/test/library-tests/dataflow/strings/main.rs +++ b/rust/ql/test/library-tests/dataflow/strings/main.rs @@ -83,8 +83,8 @@ fn format_macro() { let s2 = "2"; let s3 = "3"; - sink(format!("{}", s1)); // $ MISSING: hasTaintFlow=34 - sink(format!("{s1} and {s3}")); // $ MISSING: hasTaintFlow=34 + sink(format!("{}", s1)); // $ hasTaintFlow=34 + sink(format!("{s1} and {s3}")); // $ hasTaintFlow=34 sink(format!("{s2} and {s3}")); } diff --git a/rust/ql/test/library-tests/variables/Ssa.expected b/rust/ql/test/library-tests/variables/Ssa.expected index 6d3a2abe8863..8374634a8a77 100644 --- a/rust/ql/test/library-tests/variables/Ssa.expected +++ b/rust/ql/test/library-tests/variables/Ssa.expected @@ -146,6 +146,7 @@ definition | variables.rs:523:9:523:9 | z | variables.rs:523:9:523:9 | z | | variables.rs:532:10:532:18 | SelfParam | variables.rs:532:15:532:18 | self | | variables.rs:554:9:554:22 | var_from_macro | variables.rs:554:9:554:22 | var_from_macro | +| variables.rs:555:9:555:25 | var_in_macro | variables.rs:555:9:555:25 | var_in_macro | read | variables.rs:3:14:3:14 | s | variables.rs:3:14:3:14 | s | variables.rs:4:20:4:20 | s | | variables.rs:7:14:7:14 | i | variables.rs:7:14:7:14 | i | variables.rs:8:20:8:20 | i | @@ -284,6 +285,7 @@ read | variables.rs:523:9:523:9 | z | variables.rs:523:9:523:9 | z | variables.rs:524:20:524:20 | z | | variables.rs:532:10:532:18 | SelfParam | variables.rs:532:15:532:18 | self | variables.rs:533:6:533:9 | self | | variables.rs:554:9:554:22 | var_from_macro | variables.rs:554:9:554:22 | var_from_macro | variables.rs:556:15:556:28 | var_from_macro | +| variables.rs:555:9:555:25 | var_in_macro | variables.rs:555:9:555:25 | var_in_macro | variables.rs:555:9:555:25 | var_in_macro | firstRead | variables.rs:3:14:3:14 | s | variables.rs:3:14:3:14 | s | variables.rs:4:20:4:20 | s | | variables.rs:7:14:7:14 | i | variables.rs:7:14:7:14 | i | variables.rs:8:20:8:20 | i | @@ -395,6 +397,7 @@ firstRead | variables.rs:523:9:523:9 | z | variables.rs:523:9:523:9 | z | variables.rs:524:20:524:20 | z | | variables.rs:532:10:532:18 | SelfParam | variables.rs:532:15:532:18 | self | variables.rs:533:6:533:9 | self | | variables.rs:554:9:554:22 | var_from_macro | variables.rs:554:9:554:22 | var_from_macro | variables.rs:556:15:556:28 | var_from_macro | +| variables.rs:555:9:555:25 | var_in_macro | variables.rs:555:9:555:25 | var_in_macro | variables.rs:555:9:555:25 | var_in_macro | lastRead | variables.rs:3:14:3:14 | s | variables.rs:3:14:3:14 | s | variables.rs:4:20:4:20 | s | | variables.rs:7:14:7:14 | i | variables.rs:7:14:7:14 | i | variables.rs:8:20:8:20 | i | @@ -507,6 +510,7 @@ lastRead | variables.rs:523:9:523:9 | z | variables.rs:523:9:523:9 | z | variables.rs:524:20:524:20 | z | | variables.rs:532:10:532:18 | SelfParam | variables.rs:532:15:532:18 | self | variables.rs:533:6:533:9 | self | | variables.rs:554:9:554:22 | var_from_macro | variables.rs:554:9:554:22 | var_from_macro | variables.rs:556:15:556:28 | var_from_macro | +| variables.rs:555:9:555:25 | var_in_macro | variables.rs:555:9:555:25 | var_in_macro | variables.rs:555:9:555:25 | var_in_macro | adjacentReads | variables.rs:35:9:35:10 | x3 | variables.rs:35:9:35:10 | x3 | variables.rs:36:15:36:16 | x3 | variables.rs:38:9:38:10 | x3 | | variables.rs:43:9:43:10 | x4 | variables.rs:43:9:43:10 | x4 | variables.rs:44:15:44:16 | x4 | variables.rs:49:15:49:16 | x4 | @@ -659,3 +663,4 @@ assigns | variables.rs:519:9:519:9 | x | variables.rs:519:13:519:14 | 16 | | variables.rs:523:9:523:9 | z | variables.rs:523:13:523:14 | 17 | | variables.rs:554:9:554:22 | var_from_macro | variables.rs:555:9:555:25 | MacroExpr | +| variables.rs:555:9:555:25 | var_in_macro | variables.rs:555:23:555:24 | 37 | diff --git a/rust/ql/test/library-tests/variables/variables.expected b/rust/ql/test/library-tests/variables/variables.expected index 7d1583aff773..35d79c3b5b2a 100644 --- a/rust/ql/test/library-tests/variables/variables.expected +++ b/rust/ql/test/library-tests/variables/variables.expected @@ -276,6 +276,7 @@ variableAccess | variables.rs:533:6:533:9 | self | variables.rs:532:15:532:18 | self | | variables.rs:539:3:539:3 | a | variables.rs:538:11:538:11 | a | | variables.rs:541:13:541:13 | a | variables.rs:538:11:538:11 | a | +| variables.rs:555:9:555:25 | var_in_macro | variables.rs:555:9:555:25 | var_in_macro | | variables.rs:556:15:556:28 | var_from_macro | variables.rs:554:9:554:22 | var_from_macro | variableWriteAccess | variables.rs:23:5:23:6 | x2 | variables.rs:21:13:21:14 | x2 | @@ -436,6 +437,7 @@ variableReadAccess | variables.rs:533:6:533:9 | self | variables.rs:532:15:532:18 | self | | variables.rs:539:3:539:3 | a | variables.rs:538:11:538:11 | a | | variables.rs:541:13:541:13 | a | variables.rs:538:11:538:11 | a | +| variables.rs:555:9:555:25 | var_in_macro | variables.rs:555:9:555:25 | var_in_macro | | variables.rs:556:15:556:28 | var_from_macro | variables.rs:554:9:554:22 | var_from_macro | variableInitializer | variables.rs:16:9:16:10 | x1 | variables.rs:16:14:16:16 | "a" | diff --git a/rust/ql/test/library-tests/variables/variables.ql b/rust/ql/test/library-tests/variables/variables.ql index cbadfe53d346..f3ecd6f06851 100644 --- a/rust/ql/test/library-tests/variables/variables.ql +++ b/rust/ql/test/library-tests/variables/variables.ql @@ -18,8 +18,9 @@ query predicate capturedAccess(VariableAccess va) { va.isCapture() } module VariableAccessTest implements TestSig { string getARelevantTag() { result = ["", "write_", "read_"] + "access" } - private predicate declAt(Variable v, string filepath, int line) { - v.getLocation().hasLocationInfo(filepath, _, _, line, _) + private predicate declAt(Variable v, string filepath, int line, boolean inMacro) { + v.getLocation().hasLocationInfo(filepath, _, _, line, _) and + if v.getPat().isInMacroExpansion() then inMacro = true else inMacro = false } private predicate commmentAt(string text, string filepath, int line) { @@ -30,10 +31,15 @@ module VariableAccessTest implements TestSig { } private predicate decl(Variable v, string value) { - exists(string filepath, int line | declAt(v, filepath, line) | - commmentAt(value, filepath, line) + exists(string filepath, int line, boolean inMacro | declAt(v, filepath, line, inMacro) | + commmentAt(value, filepath, line) and + inMacro = false or - not commmentAt(_, filepath, line) and + ( + not commmentAt(_, filepath, line) + or + inMacro = true + ) and value = v.getName() ) } diff --git a/rust/ql/test/library-tests/variables/variables.rs b/rust/ql/test/library-tests/variables/variables.rs index b6a250fba87c..f64a9f2395e3 100644 --- a/rust/ql/test/library-tests/variables/variables.rs +++ b/rust/ql/test/library-tests/variables/variables.rs @@ -552,7 +552,7 @@ macro_rules! let_in_macro { fn macro_invocation() { let var_from_macro = - let_in_macro!(37); // $ MISSING: read_access=var_in_macro + let_in_macro!(37); // $ read_access=var_in_macro print_i64(var_from_macro); // $ read_access=var_from_macro }