Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data flow: Synthesize parameter return nodes #16394

Merged
merged 4 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ module ProductFlow {
Flow1::PathGraph::edges(pred1, succ1, _, _) and
exists(ReturnKindExt returnKind |
succ1.getNode() = returnKind.getAnOutNode(call) and
pred1.getNode().(ReturnNodeExt).getKind() = returnKind
paramReturnNode(_, pred1.asParameterReturnNode(), _, returnKind)
)
}

Expand Down Expand Up @@ -574,7 +574,7 @@ module ProductFlow {
Flow2::PathGraph::edges(pred2, succ2, _, _) and
exists(ReturnKindExt returnKind |
succ2.getNode() = returnKind.getAnOutNode(call) and
pred2.getNode().(ReturnNodeExt).getKind() = returnKind
paramReturnNode(_, pred2.asParameterReturnNode(), _, returnKind)
)
}

Expand Down
172 changes: 120 additions & 52 deletions cpp/ql/test/library-tests/dataflow/fields/ir-path-flow.expected

Large diffs are not rendered by default.

194 changes: 131 additions & 63 deletions cpp/ql/test/library-tests/dataflow/fields/path-flow.expected

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ edges
| test.cpp:187:18:187:25 | *filename | test.cpp:187:11:187:15 | strncat output argument | provenance | TaintFunction |
| test.cpp:188:11:188:17 | strncat output argument | test.cpp:186:19:186:25 | *command | provenance | |
| test.cpp:188:11:188:17 | strncat output argument | test.cpp:186:19:186:25 | *command | provenance | |
| test.cpp:188:11:188:17 | strncat output argument | test.cpp:186:19:186:25 | *command [Return] | provenance | |
| test.cpp:188:11:188:17 | strncat output argument | test.cpp:186:19:186:25 | *command [Return] | provenance | |
| test.cpp:188:20:188:24 | *flags | test.cpp:188:11:188:17 | strncat output argument | provenance | |
| test.cpp:188:20:188:24 | *flags | test.cpp:188:11:188:17 | strncat output argument | provenance | TaintFunction |
| test.cpp:194:9:194:16 | fread output argument | test.cpp:196:26:196:33 | *filename | provenance | |
Expand Down Expand Up @@ -125,6 +127,8 @@ nodes
| test.cpp:183:32:183:38 | *command | semmle.label | *command |
| test.cpp:186:19:186:25 | *command | semmle.label | *command |
| test.cpp:186:19:186:25 | *command | semmle.label | *command |
| test.cpp:186:19:186:25 | *command [Return] | semmle.label | *command [Return] |
| test.cpp:186:19:186:25 | *command [Return] | semmle.label | *command [Return] |
| test.cpp:186:47:186:54 | *filename | semmle.label | *filename |
| test.cpp:187:11:187:15 | strncat output argument | semmle.label | strncat output argument |
| test.cpp:187:11:187:15 | strncat output argument | semmle.label | strncat output argument |
Expand All @@ -151,8 +155,8 @@ nodes
subpaths
| test.cpp:196:26:196:33 | *filename | test.cpp:186:47:186:54 | *filename | test.cpp:186:19:186:25 | *command | test.cpp:196:10:196:16 | concat output argument |
| test.cpp:196:26:196:33 | *filename | test.cpp:186:47:186:54 | *filename | test.cpp:186:19:186:25 | *command | test.cpp:196:10:196:16 | concat output argument |
| test.cpp:196:26:196:33 | *filename | test.cpp:186:47:186:54 | *filename | test.cpp:188:11:188:17 | strncat output argument | test.cpp:196:10:196:16 | concat output argument |
| test.cpp:196:26:196:33 | *filename | test.cpp:186:47:186:54 | *filename | test.cpp:188:11:188:17 | strncat output argument | test.cpp:196:10:196:16 | concat output argument |
| test.cpp:196:26:196:33 | *filename | test.cpp:186:47:186:54 | *filename | test.cpp:186:19:186:25 | *command [Return] | test.cpp:196:10:196:16 | concat output argument |
| test.cpp:196:26:196:33 | *filename | test.cpp:186:47:186:54 | *filename | test.cpp:186:19:186:25 | *command [Return] | test.cpp:196:10:196:16 | concat output argument |
#select
| test.cpp:23:12:23:19 | command1 | test.cpp:15:27:15:30 | **argv | test.cpp:23:12:23:19 | *command1 | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to system(string). | test.cpp:15:27:15:30 | **argv | user input (a command-line argument) | test.cpp:22:13:22:20 | sprintf output argument | sprintf output argument |
| test.cpp:51:10:51:16 | command | test.cpp:47:21:47:26 | *call to getenv | test.cpp:51:10:51:16 | *command | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to system(string). | test.cpp:47:21:47:26 | *call to getenv | user input (an environment variable) | test.cpp:50:11:50:17 | sprintf output argument | sprintf output argument |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ edges
| test.cpp:228:27:228:54 | call to malloc | test.cpp:228:27:228:54 | call to malloc | provenance | |
| test.cpp:228:27:228:54 | call to malloc | test.cpp:232:10:232:15 | buffer | provenance | |
| test.cpp:235:40:235:45 | buffer | test.cpp:236:5:236:26 | ... = ... | provenance | |
| test.cpp:236:5:236:9 | *p_str [post update] [string] | test.cpp:235:27:235:31 | *p_str [Return] [string] | provenance | |
| test.cpp:236:5:236:9 | *p_str [post update] [string] | test.cpp:235:27:235:31 | *p_str [string] | provenance | |
| test.cpp:236:5:236:26 | ... = ... | test.cpp:236:5:236:9 | *p_str [post update] [string] | provenance | |
| test.cpp:241:20:241:38 | call to malloc | test.cpp:241:20:241:38 | call to malloc | provenance | |
Expand Down Expand Up @@ -128,6 +129,7 @@ nodes
| test.cpp:228:27:228:54 | call to malloc | semmle.label | call to malloc |
| test.cpp:228:27:228:54 | call to malloc | semmle.label | call to malloc |
| test.cpp:232:10:232:15 | buffer | semmle.label | buffer |
| test.cpp:235:27:235:31 | *p_str [Return] [string] | semmle.label | *p_str [Return] [string] |
| test.cpp:235:27:235:31 | *p_str [string] | semmle.label | *p_str [string] |
| test.cpp:235:40:235:45 | buffer | semmle.label | buffer |
| test.cpp:236:5:236:9 | *p_str [post update] [string] | semmle.label | *p_str [post update] [string] |
Expand All @@ -150,8 +152,8 @@ nodes
| test.cpp:264:13:264:30 | call to malloc | semmle.label | call to malloc |
| test.cpp:266:12:266:12 | p | semmle.label | p |
subpaths
| test.cpp:242:22:242:27 | buffer | test.cpp:235:40:235:45 | buffer | test.cpp:235:27:235:31 | *p_str [Return] [string] | test.cpp:242:16:242:19 | set_string output argument [string] |
| test.cpp:242:22:242:27 | buffer | test.cpp:235:40:235:45 | buffer | test.cpp:235:27:235:31 | *p_str [string] | test.cpp:242:16:242:19 | set_string output argument [string] |
| test.cpp:242:22:242:27 | buffer | test.cpp:235:40:235:45 | buffer | test.cpp:236:5:236:9 | *p_str [post update] [string] | test.cpp:242:16:242:19 | set_string output argument [string] |
#select
| test.cpp:42:5:42:11 | call to strncpy | test.cpp:18:19:18:24 | call to malloc | test.cpp:42:18:42:23 | string | This write may overflow $@ by 1 element. | test.cpp:42:18:42:23 | string | string |
| test.cpp:72:9:72:15 | call to strncpy | test.cpp:18:19:18:24 | call to malloc | test.cpp:72:22:72:27 | string | This write may overflow $@ by 1 element. | test.cpp:72:22:72:27 | string | string |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ edges
| tests2.cpp:111:14:111:15 | *c1 [*ptr] | tests2.cpp:111:14:111:19 | *ptr | provenance | |
| tests2.cpp:111:14:111:15 | *c1 [*ptr] | tests2.cpp:111:17:111:19 | *ptr | provenance | |
| tests2.cpp:111:17:111:19 | *ptr | tests2.cpp:111:14:111:19 | *ptr | provenance | |
| tests2.cpp:120:5:120:21 | [summary param] 1 indirection in zmq_msg_init_data | tests2.cpp:120:5:120:21 | [summary] to write: Argument[0 indirection] in zmq_msg_init_data | provenance | |
| tests2.cpp:120:5:120:21 | [summary param] 1 indirection in zmq_msg_init_data | tests2.cpp:120:5:120:21 | [summary param] 0 indirection in zmq_msg_init_data [Return] | provenance | |
| tests2.cpp:134:2:134:30 | *... = ... | tests2.cpp:138:23:138:34 | *message_data | provenance | |
| tests2.cpp:134:2:134:30 | *... = ... | tests2.cpp:143:34:143:45 | *message_data | provenance | |
| tests2.cpp:134:17:134:22 | *call to getenv | tests2.cpp:134:2:134:30 | *... = ... | provenance | |
Expand Down Expand Up @@ -52,8 +52,8 @@ nodes
| tests2.cpp:111:14:111:15 | *c1 [*ptr] | semmle.label | *c1 [*ptr] |
| tests2.cpp:111:14:111:19 | *ptr | semmle.label | *ptr |
| tests2.cpp:111:17:111:19 | *ptr | semmle.label | *ptr |
| tests2.cpp:120:5:120:21 | [summary param] 0 indirection in zmq_msg_init_data [Return] | semmle.label | [summary param] 0 indirection in zmq_msg_init_data [Return] |
| tests2.cpp:120:5:120:21 | [summary param] 1 indirection in zmq_msg_init_data | semmle.label | [summary param] 1 indirection in zmq_msg_init_data |
| tests2.cpp:120:5:120:21 | [summary] to write: Argument[0 indirection] in zmq_msg_init_data | semmle.label | [summary] to write: Argument[0 indirection] in zmq_msg_init_data |
| tests2.cpp:134:2:134:30 | *... = ... | semmle.label | *... = ... |
| tests2.cpp:134:17:134:22 | *call to getenv | semmle.label | *call to getenv |
| tests2.cpp:138:23:138:34 | *message_data | semmle.label | *message_data |
Expand All @@ -74,7 +74,7 @@ nodes
| tests_sysconf.cpp:36:21:36:27 | confstr output argument | semmle.label | confstr output argument |
| tests_sysconf.cpp:39:19:39:25 | *pathbuf | semmle.label | *pathbuf |
subpaths
| tests2.cpp:143:34:143:45 | *message_data | tests2.cpp:120:5:120:21 | [summary param] 1 indirection in zmq_msg_init_data | tests2.cpp:120:5:120:21 | [summary] to write: Argument[0 indirection] in zmq_msg_init_data | tests2.cpp:143:24:143:31 | zmq_msg_init_data output argument |
| tests2.cpp:143:34:143:45 | *message_data | tests2.cpp:120:5:120:21 | [summary param] 1 indirection in zmq_msg_init_data | tests2.cpp:120:5:120:21 | [summary param] 0 indirection in zmq_msg_init_data [Return] | tests2.cpp:143:24:143:31 | zmq_msg_init_data output argument |
#select
| tests2.cpp:63:13:63:26 | *call to getenv | tests2.cpp:63:13:63:26 | *call to getenv | tests2.cpp:63:13:63:26 | *call to getenv | This operation exposes system data from $@. | tests2.cpp:63:13:63:26 | *call to getenv | *call to getenv |
| tests2.cpp:64:13:64:26 | *call to getenv | tests2.cpp:64:13:64:26 | *call to getenv | tests2.cpp:64:13:64:26 | *call to getenv | This operation exposes system data from $@. | tests2.cpp:64:13:64:26 | *call to getenv | *call to getenv |
Expand Down
40 changes: 30 additions & 10 deletions csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,29 @@
private import CaptureModelsSpecific
private import CaptureModelsPrinting

/**
* A node from which flow can return to the caller. This is either a regular
* `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
*/
private class ReturnNodeExt extends DataFlow::Node {
private DataFlowImplCommon::ReturnKindExt kind;

ReturnNodeExt() {
kind = DataFlowImplCommon::getValueReturnPosition(this).getKind() or
kind = DataFlowImplCommon::getParamReturnPosition(this, _).getKind()
}

string getOutput() {
kind instanceof DataFlowImplCommon::ValueReturnKind and
result = "ReturnValue"
or
exists(ParameterPosition pos |
pos = kind.(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and
result = paramReturnNodeAsOutput(returnNodeEnclosingCallable(this), pos)
)
}
}

class DataFlowTargetApi extends TargetApiSpecific {
DataFlowTargetApi() { not isUninterestingForDataFlowModels(this) }
}
Expand Down Expand Up @@ -65,7 +88,7 @@ string asInputArgument(DataFlow::Node source) { result = asInputArgumentSpecific
* Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`).
*/
string captureQualifierFlow(TargetApiSpecific api) {
exists(DataFlowImplCommon::ReturnNodeExt ret |
exists(ReturnNodeExt ret |
api = returnNodeEnclosingCallable(ret) and
isOwnInstanceAccessNode(ret)
) and
Expand Down Expand Up @@ -130,7 +153,7 @@ module ThroughFlowConfig implements DataFlow::StateConfigSig {
}

predicate isSink(DataFlow::Node sink, FlowState state) {
sink instanceof DataFlowImplCommon::ReturnNodeExt and
sink instanceof ReturnNodeExt and
not isOwnInstanceAccessNode(sink) and
not exists(captureQualifierFlow(sink.asExpr().getEnclosingCallable())) and
(state instanceof TaintRead or state instanceof TaintStore)
Expand Down Expand Up @@ -171,14 +194,11 @@ private module ThroughFlow = TaintTracking::GlobalWithState<ThroughFlowConfig>;
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
string captureThroughFlow(DataFlowTargetApi api) {
exists(
DataFlow::ParameterNode p, DataFlowImplCommon::ReturnNodeExt returnNodeExt, string input,
string output
|
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input, string output |
ThroughFlow::flow(p, returnNodeExt) and
returnNodeExt.(DataFlow::Node).getEnclosingCallable() = api and
input = parameterNodeAsInput(p) and
output = returnNodeAsOutput(returnNodeExt) and
output = returnNodeExt.getOutput() and
input != output and
result = ModelPrinting::asTaintModel(api, input, output)
)
Expand All @@ -196,7 +216,7 @@ module FromSourceConfig implements DataFlow::ConfigSig {

predicate isSink(DataFlow::Node sink) {
exists(DataFlowTargetApi c |
sink instanceof DataFlowImplCommon::ReturnNodeExt and
sink instanceof ReturnNodeExt and
sink.getEnclosingCallable() = c
)
}
Expand All @@ -214,12 +234,12 @@ private module FromSource = TaintTracking::Global<FromSourceConfig>;
* Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`.
*/
string captureSource(DataFlowTargetApi api) {
exists(DataFlow::Node source, DataFlow::Node sink, string kind |
exists(DataFlow::Node source, ReturnNodeExt sink, string kind |
FromSource::flow(source, sink) and
ExternalFlow::sourceNode(source, kind) and
api = sink.getEnclosingCallable() and
isRelevantSourceKind(kind) and
result = ModelPrinting::asSourceModel(api, returnNodeAsOutput(sink), kind)
result = ModelPrinting::asSourceModel(api, sink.getOutput(), kind)
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ private import semmle.code.csharp.frameworks.system.linq.Expressions
import semmle.code.csharp.dataflow.internal.ExternalFlow as ExternalFlow
import semmle.code.csharp.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
import semmle.code.csharp.dataflow.internal.DataFlowPrivate as DataFlowPrivate
import semmle.code.csharp.dataflow.internal.DataFlowDispatch as DataFlowDispatch

module DataFlow = CS::DataFlow;

Expand Down Expand Up @@ -133,32 +134,24 @@ string parameterAccess(CS::Parameter p) {

class InstanceParameterNode = DataFlowPrivate::InstanceParameterNode;

pragma[nomagic]
private CS::Parameter getParameter(DataFlowImplCommon::ReturnNodeExt node, ParameterPosition pos) {
result = node.(DataFlow::Node).getEnclosingCallable().getParameter(pos.getPosition())
}
class ParameterPosition = DataFlowDispatch::ParameterPosition;

/**
* Gets the MaD string representation of the the return node `node`.
* Gets the MaD string representation of return through parameter at position
* `pos` of callable `c`.
*/
string returnNodeAsOutput(DataFlowImplCommon::ReturnNodeExt node) {
if node.getKind() instanceof DataFlowImplCommon::ValueReturnKind
then result = "ReturnValue"
else
exists(ParameterPosition pos |
pos = node.getKind().(DataFlowImplCommon::ParamUpdateReturnKind).getPosition()
|
result = parameterAccess(getParameter(node, pos))
or
pos.isThisParameter() and
result = qualifierString()
)
bindingset[c]
string paramReturnNodeAsOutput(CS::Callable c, ParameterPosition pos) {
result = parameterAccess(c.getParameter(pos.getPosition()))
or
pos.isThisParameter() and
result = qualifierString()
}

/**
* Gets the enclosing callable of `ret`.
*/
CS::Callable returnNodeEnclosingCallable(DataFlowImplCommon::ReturnNodeExt ret) {
CS::Callable returnNodeEnclosingCallable(DataFlow::Node ret) {
result = DataFlowImplCommon::getNodeEnclosingCallable(ret).asCallable()
}

Expand Down