Skip to content

Commit 154fbb7

Browse files
[jssrc2cpg] Optimize performance for AST node processing (#5922)
Replace several JVM-costly patterns in the per-node hot path: - Reflection-based BabelAst.fromString with pre-built Map lookup - Try/exception-based JSON access with ujson Option APIs - SortedMap position lookups with array-based binary search - Redundant createBabelNodeInfo calls with lightweight nodeTypeOf - Unnecessary .arr.toList intermediate allocations - Regex replaceAll with literal replace in EjsPreprocessor - Option-allocating HashMap patterns with direct getOrElse/update
1 parent 52b3b9e commit 154fbb7

12 files changed

Lines changed: 412 additions & 162 deletions

joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/astcreation/AstCreator.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ class AstCreator(val config: Config, val usedTypes: mutable.HashSet[String], val
5858

5959
// we track line and column numbers manually because astgen / @babel-parser sometimes
6060
// fails to deliver them at all - strange, but this even happens with its latest version
61-
protected val (positionToLineNumberMapping, positionToFirstPositionInLineMapping) =
62-
positionLookupTables(parserResult.fileContent)
61+
protected val (lineEndPositions, lineStartPositions) =
62+
buildPositionArrays(parserResult.fileContent)
6363

6464
override def createAst(): DiffGraphBuilder = {
6565
val fileContent = if (!config.disableFileContent) Option(parserResult.fileContent) else None

joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/astcreation/AstCreatorHelper.scala

Lines changed: 45 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -9,28 +9,28 @@ import io.shiftleft.codepropertygraph.generated.{EdgeTypes, PropertyDefaults, Pr
99
import io.shiftleft.codepropertygraph.generated.nodes.*
1010
import ujson.Value
1111

12-
import scala.collection.{SortedMap, mutable}
13-
import scala.util.Try
12+
import scala.collection.mutable
1413

1514
trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>
1615

1716
private val anonClassKeyPool = new IntervalKeyPool(first = 0, last = Long.MaxValue)
1817

18+
protected def nodeTypeOf(json: Value): BabelNode = fromString(json("type").str)
19+
1920
protected def createBabelNodeInfo(json: Value): BabelNodeInfo = {
2021
val c = code(json)
2122
val ln = line(json)
2223
val cn = column(json)
2324
val lnEnd = lineEnd(json)
2425
val cnEnd = columnEnd(json)
25-
val node = nodeType(json)
26+
val node = nodeTypeOf(json)
2627
BabelNodeInfo(node, json, c, ln, cn, lnEnd, cnEnd)
2728
}
2829

29-
private def nodeType(node: Value): BabelNode = fromString(node("type").str)
30-
3130
protected def line(node: Value): Option[Int] = start(node).map(getLineOfSource)
3231

33-
protected def start(node: Value): Option[Int] = Try(node("start").num.toInt).toOption
32+
protected def start(node: Value): Option[Int] =
33+
node.obj.get("start").flatMap(_.numOpt).map(_.toInt)
3434

3535
protected def range(node: Value): Option[String] = {
3636
for {
@@ -39,22 +39,28 @@ trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: As
3939
} yield s"$nodeStart:$nodeEnd"
4040
}
4141

42-
// Returns the line number for a given position in the source.
42+
// Binary search: find first line whose end-position >= position
4343
private def getLineOfSource(position: Int): Int = {
44-
val (_, lineNumber) = positionToLineNumberMapping.minAfter(position).get
45-
lineNumber
44+
var lo = 0
45+
var hi = lineEndPositions.length - 1
46+
while (lo < hi) {
47+
val mid = (lo + hi) >>> 1
48+
if (lineEndPositions(mid) < position) lo = mid + 1
49+
else hi = mid
50+
}
51+
lo + 1 // 1-based line numbers
4652
}
4753

4854
protected def lineEnd(node: Value): Option[Int] = end(node).map(getLineOfSource)
4955

50-
protected def end(node: Value): Option[Int] = Try(node("end").num.toInt).toOption
56+
protected def end(node: Value): Option[Int] =
57+
node.obj.get("end").flatMap(_.numOpt).map(_.toInt)
5158

5259
protected def column(node: Value): Option[Int] = start(node).map(getColumnOfSource)
5360

54-
// Returns the column number for a given position in the source.
5561
private def getColumnOfSource(position: Int): Int = {
56-
val (_, firstPositionInLine) = positionToFirstPositionInLineMapping.minAfter(position).get
57-
position - firstPositionInLine
62+
val lineIdx = getLineOfSource(position) - 1
63+
position - lineStartPositions(lineIdx)
5864
}
5965

6066
protected def columnEnd(node: Value): Option[Int] = end(node).map(getColumnOfSource)
@@ -130,41 +136,39 @@ trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: As
130136
usedVariableNames: mutable.HashMap[String, Int],
131137
variableName: String
132138
): String = {
133-
val counter = usedVariableNames.get(variableName).map(_ + 1).getOrElse(0)
139+
val counter = usedVariableNames.getOrElse(variableName, -1) + 1
134140
val currentVariableName = s"${variableName}_$counter"
135-
usedVariableNames.put(variableName, counter)
141+
usedVariableNames.update(variableName, counter)
136142
currentVariableName
137143
}
138144

139145
protected def safeBool(node: Value, key: String): Option[Boolean] =
140-
if (hasKey(node, key)) Try(node(key).bool).toOption else None
146+
node.obj.get(key).flatMap(_.boolOpt)
141147

142148
protected def safeObj(node: Value, key: String): Option[upickle.core.LinkedHashMap[String, Value]] =
143-
Try(node(key).obj).toOption.filter(_.nonEmpty)
144-
145-
protected def positionLookupTables(source: String): (SortedMap[Int, Int], SortedMap[Int, Int]) = {
146-
val positionToLineNumber, positionToFirstPositionInLine = mutable.TreeMap.empty[Int, Int]
147-
val data = source.toCharArray
148-
var lineNumber = 1
149-
var firstPositionInLine = 0
150-
var position = 0
149+
node.obj.get(key).flatMap(_.objOpt).filter(_.nonEmpty)
150+
151+
protected def buildPositionArrays(source: String): (Array[Int], Array[Int]) = {
152+
val ends = mutable.ArrayBuffer[Int]()
153+
val starts = mutable.ArrayBuffer[Int]()
154+
var firstPositionInLine = 0
155+
var position = 0
156+
val data = source.toCharArray
151157
while (position < data.length) {
152-
val isNewLine = data(position) == '\n'
153-
if (isNewLine) {
154-
positionToLineNumber.put(position, lineNumber)
155-
lineNumber += 1
156-
positionToFirstPositionInLine.put(position, firstPositionInLine)
158+
if (data(position) == '\n') {
159+
ends += position
160+
starts += firstPositionInLine
157161
firstPositionInLine = position + 1
158162
}
159163
position += 1
160164
}
161-
positionToLineNumber.put(position, lineNumber)
162-
positionToFirstPositionInLine.put(position, firstPositionInLine)
163-
164-
// for empty line at the end of each JS/TS file generated by BabelJsonParser:
165-
positionToLineNumber.put(position + 1, lineNumber + 1)
166-
positionToFirstPositionInLine.put(position + 1, 0)
167-
(positionToLineNumber, positionToFirstPositionInLine)
165+
// Final line (may not end with newline)
166+
ends += position
167+
starts += firstPositionInLine
168+
// Extra entry for empty line at end of file (matches current BabelJsonParser behavior)
169+
ends += (position + 1)
170+
starts += 0
171+
(ends.toArray, starts.toArray)
168172
}
169173

170174
protected def calcMethodNameAndFullName(func: BabelNodeInfo): (String, String) = {
@@ -216,7 +220,7 @@ trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: As
216220
}
217221

218222
protected def safeStr(node: Value, key: String): Option[String] =
219-
if (hasKey(node, key)) Try(node(key).str).toOption else None
223+
node.obj.get(key).flatMap(_.strOpt)
220224

221225
private def isMethodOrGetSet(func: BabelNodeInfo): Boolean = {
222226
if (hasKey(func.json, "kind") && !func.json("kind").isNull) {
@@ -248,7 +252,7 @@ trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: As
248252
(name, fullName)
249253
}
250254

251-
/** In JS it is possible to create anonymous classes. We have to handle this here.
255+
/** In JS, it is possible to create anonymous classes. We have to handle this here.
252256
*/
253257
private def calcTypeName(classNode: BabelNodeInfo): String =
254258
if (hasKey(classNode.json, "id") && !classNode.json("id").isNull) code(classNode.json("id"))
@@ -263,8 +267,9 @@ trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: As
263267
}
264268
}
265269

266-
protected def hasKey(node: Value, key: String): Boolean = Try(node(key)).isSuccess
270+
protected def hasKey(node: Value, key: String): Boolean =
271+
node.objOpt.exists(_.contains(key))
267272

268-
protected def nextAnonClassName(): String = s"<anon-class>${anonClassKeyPool.next}"
273+
private def nextAnonClassName(): String = s"<anon-class>${anonClassKeyPool.next}"
269274

270275
}

joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/astcreation/AstForDeclarationsCreator.scala

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@ import io.shiftleft.codepropertygraph.generated.EvaluationStrategies
1313
import io.shiftleft.codepropertygraph.generated.Operators
1414
import ujson.Value
1515

16-
import scala.util.Try
17-
1816
trait AstForDeclarationsCreator(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>
1917

2018
private val DefaultsKey = "default"
@@ -60,7 +58,7 @@ trait AstForDeclarationsCreator(implicit withSchemaValidation: ValidationMode) {
6058

6159
private def decoratorElements(elem: BabelNodeInfo): List[BabelNodeInfo] = {
6260
if (hasKey(elem.json, "decorators") && !elem.json("decorators").isNull) {
63-
elem.json("decorators").arr.toList.map(createBabelNodeInfo)
61+
elem.json("decorators").arr.map(createBabelNodeInfo).toList
6462
} else List.empty
6563
}
6664

@@ -104,7 +102,6 @@ trait AstForDeclarationsCreator(implicit withSchemaValidation: ValidationMode) {
104102
val specifiers = declaration
105103
.json("specifiers")
106104
.arr
107-
.toList
108105
.map { spec =>
109106
if (createBabelNodeInfo(spec).node == ExportNamespaceSpecifier) {
110107
val exported = createBabelNodeInfo(spec("exported"))
@@ -149,7 +146,7 @@ trait AstForDeclarationsCreator(implicit withSchemaValidation: ValidationMode) {
149146
case _ => Ast()
150147
}
151148

152-
val asts = fromAst +: (specifierAsts ++ declAsts)
149+
val asts = (fromAst +: (specifierAsts ++ declAsts)).toList
153150
blockAst(blockNode(declaration, declaration.code, Defines.Any), asts)
154151
}
155152

@@ -308,7 +305,7 @@ trait AstForDeclarationsCreator(implicit withSchemaValidation: ValidationMode) {
308305
val kind = declaration.json("kind").str
309306
val scopeType = if (kind == "let") { VariableScopeManager.ScopeType.BlockScope }
310307
else { VariableScopeManager.ScopeType.MethodScope }
311-
val declAsts = declaration.json("declarations").arr.toList.map(astForVariableDeclarator(_, scopeType, kind))
308+
val declAsts = declaration.json("declarations").arr.map(astForVariableDeclarator(_, scopeType, kind)).toList
312309
declAsts match {
313310
case Nil => Ast()
314311
case head :: Nil => head
@@ -346,7 +343,7 @@ trait AstForDeclarationsCreator(implicit withSchemaValidation: ValidationMode) {
346343
diffGraph.addEdge(importNode, _dependencyNode, EdgeTypes.IMPORTS)
347344
assignment
348345
} else {
349-
val specs = impDecl.json("specifiers").arr.toList
346+
val specs = impDecl.json("specifiers").arr
350347
val requireCalls = specs.map { importSpecifier =>
351348
val isImportN = createBabelNodeInfo(importSpecifier).node match {
352349
case ImportSpecifier => true
@@ -368,7 +365,7 @@ trait AstForDeclarationsCreator(implicit withSchemaValidation: ValidationMode) {
368365
} else if (requireCalls.sizeIs == 1) {
369366
requireCalls.head
370367
} else {
371-
blockAst(blockNode(impDecl, impDecl.code, Defines.Any), requireCalls)
368+
blockAst(blockNode(impDecl, impDecl.code, Defines.Any), requireCalls.toList)
372369
}
373370
}
374371
}
@@ -490,7 +487,7 @@ trait AstForDeclarationsCreator(implicit withSchemaValidation: ValidationMode) {
490487

491488
val subTreeAsts = pattern.node match {
492489
case ObjectPattern =>
493-
pattern.json("properties").arr.toList.map { element =>
490+
pattern.json("properties").arr.map { element =>
494491
val nodeInfo = createBabelNodeInfo(element)
495492
nodeInfo.node match {
496493
case RestElement =>
@@ -512,7 +509,7 @@ trait AstForDeclarationsCreator(implicit withSchemaValidation: ValidationMode) {
512509
}
513510
}
514511
case ArrayPattern =>
515-
pattern.json("elements").arr.toList.zipWithIndex.map {
512+
pattern.json("elements").arr.zipWithIndex.map {
516513
case (element, index) if !element.isNull =>
517514
val nodeInfo = createBabelNodeInfo(element)
518515
nodeInfo.node match {
@@ -538,7 +535,7 @@ trait AstForDeclarationsCreator(implicit withSchemaValidation: ValidationMode) {
538535
scope.popScope()
539536
localAstParentStack.pop()
540537

541-
val blockChildren = assignmentTmpCallAst +: subTreeAsts :+ Ast(returnTmpNode)
538+
val blockChildren = (assignmentTmpCallAst +: subTreeAsts :+ Ast(returnTmpNode)).toList
542539
blockAst(blockNode_, blockChildren)
543540
}
544541

@@ -565,8 +562,8 @@ trait AstForDeclarationsCreator(implicit withSchemaValidation: ValidationMode) {
565562
val groupId = rhsCode.substring(rhsCode.indexOf(s"$RequireKeyword(") + 9, rhsCode.indexOf(")") - 1)
566563
val nodeInfo = createBabelNodeInfo(lhs)
567564
val names = nodeInfo.node match {
568-
case ArrayPattern => nodeInfo.json("elements").arr.toList.map(code)
569-
case ObjectPattern => nodeInfo.json("properties").arr.toList.map(code)
565+
case ArrayPattern => nodeInfo.json("elements").arr.map(code).toList
566+
case ObjectPattern => nodeInfo.json("properties").arr.map(code).toList
570567
case _ => List(code(lhs))
571568
}
572569
names.foreach { name =>
@@ -584,7 +581,7 @@ trait AstForDeclarationsCreator(implicit withSchemaValidation: ValidationMode) {
584581
): Ast = {
585582
val idNodeInfo = createBabelNodeInfo(declarator("id"))
586583
val declNodeInfo = createBabelNodeInfo(declarator)
587-
val initNodeInfo = Try(createBabelNodeInfo(declarator("init"))).toOption
584+
val initNodeInfo = declarator.obj.get("init").filter(!_.isNull).map(createBabelNodeInfo)
588585
val declaratorCode = s"$kind ${code(declarator)}"
589586
val tpe = typeFor(declNodeInfo)
590587
val typeFullName = if (Defines.isBuiltinType(tpe)) tpe else Defines.Any

joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/astcreation/AstForExpressionsCreator.scala

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ import io.joern.x2cpg.frontendspecific.jssrc2cpg.{Defines, GlobalBuiltins}
99
import io.shiftleft.codepropertygraph.generated.{DispatchTypes, EdgeTypes, EvaluationStrategies, Operators}
1010
import io.shiftleft.codepropertygraph.generated.nodes.{NewFieldIdentifier, NewIdentifier, NewMethodRef, NewNode}
1111

12-
import scala.util.Try
13-
1412
trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>
1513

1614
protected def astForExpressionStatement(exprStmt: BabelNodeInfo): Ast =
@@ -333,7 +331,7 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) {
333331

334332
protected def astForArrayExpression(arrExpr: BabelNodeInfo, elementsKey: String = "elements"): Ast = {
335333
val MAX_INITIALIZERS = 1000
336-
val elementsJsons = Try(arrExpr.json(elementsKey).arr).toOption.toList.flatten
334+
val elementsJsons = arrExpr.json.obj.get(elementsKey).flatMap(_.arrOpt).toList.flatten
337335
val elements = elementsJsons.slice(0, MAX_INITIALIZERS)
338336
if (elements.isEmpty) {
339337
Ast(
@@ -401,7 +399,7 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) {
401399
}
402400

403401
private def handleTemplateExpressionArgs(templateExpr: BabelNodeInfo, callExpressionInfo: CallExpressionInfo): Ast = {
404-
val expressionArgs = templateExpr.json("quasi")("expressions").arr.toList.map(astForNodeWithFunctionReference)
402+
val expressionArgs = templateExpr.json("quasi")("expressions").arr.map(astForNodeWithFunctionReference).toSeq
405403
val quasisArg = astForArrayExpression(createBabelNodeInfo(templateExpr.json("quasi")), "quasis")
406404
val callNode_ =
407405
callNode(templateExpr, templateExpr.code, callExpressionInfo.callName, DispatchTypes.DYNAMIC_DISPATCH)
@@ -440,7 +438,7 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) {
440438
val localTmpNode = localNode(objExpr, tmpName, tmpName, Defines.Any).order(0)
441439
diffGraph.addEdge(localAstParentStack.head, localTmpNode, EdgeTypes.AST)
442440

443-
val propertiesAsts = objExpr.json("properties").arr.toList.map { property =>
441+
val propertiesAsts = objExpr.json("properties").arr.map { property =>
444442
val nodeInfo = createBabelNodeInfo(property)
445443
nodeInfo.node match {
446444
case SpreadElement | RestElement =>
@@ -500,7 +498,7 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) {
500498
scope.popScope()
501499
localAstParentStack.pop()
502500

503-
val childrenAsts = propertiesAsts :+ Ast(tmpNode)
501+
val childrenAsts = (propertiesAsts :+ Ast(tmpNode)).toList
504502
blockAst(blockNode_, childrenAsts)
505503
}
506504

joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/astcreation/AstForFunctionsCreator.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import io.shiftleft.codepropertygraph.generated.{DispatchTypes, EdgeTypes, Evalu
1111
import ujson.Value
1212

1313
import scala.collection.mutable
14-
import scala.util.Try
1514

1615
trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>
1716

@@ -113,7 +112,7 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th
113112
EvaluationStrategies.BY_VALUE,
114113
typeFullName
115114
).possibleTypes(possibleTypes)
116-
additionalBlockStatements.addAll(nodeInfo.json("elements").arr.toList.map {
115+
additionalBlockStatements.addAll(nodeInfo.json("elements").arr.map {
117116
case element if !element.isNull =>
118117
val elementNodeInfo = createBabelNodeInfo(element)
119118
elementNodeInfo.node match {
@@ -181,7 +180,7 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th
181180
).possibleTypes(possibleTypes)
182181
scope.addVariable(paramName, param, typeFullName, VariableScopeManager.ScopeType.MethodScope)
183182

184-
additionalBlockStatements.addAll(nodeInfo.json("properties").arr.toList.map { element =>
183+
additionalBlockStatements.addAll(nodeInfo.json("properties").arr.map { element =>
185184
val elementNodeInfo = createBabelNodeInfo(element)
186185
elementNodeInfo.node match {
187186
case ObjectProperty =>
@@ -225,7 +224,10 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th
225224
// Handle types declared as `credentials: { username: string; password: string; }`
226225
val tpe = typeFor(nodeInfo)
227226
var typeFullName = if (Defines.isBuiltinType(tpe)) tpe else Defines.Any
228-
val possibleType = Try(createBabelNodeInfo(nodeInfo.json("typeAnnotation")("typeAnnotation")))
227+
val possibleType = nodeInfo.json.obj
228+
.get("typeAnnotation")
229+
.flatMap(_.obj.get("typeAnnotation"))
230+
.map(createBabelNodeInfo)
229231
.map(x =>
230232
x.node match {
231233
case TSTypeLiteral =>

joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/astcreation/AstForPrimitivesCreator.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,9 @@ trait AstForPrimitivesCreator(implicit withSchemaValidation: ValidationMode) { t
7272
Ast(literalNode(booleanLiteral, booleanLiteral.code, Option(Defines.Boolean)))
7373

7474
protected def astForTemplateLiteral(templateLiteral: BabelNodeInfo): Ast = {
75-
val expressions = templateLiteral.json("expressions").arr.toList
76-
val quasis = templateLiteral.json("quasis").arr.toList.filterNot(_("tail").bool)
77-
val quasisTail = templateLiteral.json("quasis").arr.toList.filter(_("tail").bool).head
75+
val expressions = templateLiteral.json("expressions").arr
76+
val quasis = templateLiteral.json("quasis").arr.filterNot(_("tail").bool)
77+
val quasisTail = templateLiteral.json("quasis").arr.filter(_("tail").bool).head
7878

7979
if (expressions.isEmpty && quasis.isEmpty) {
8080
astForTemplateElement(createBabelNodeInfo(quasisTail))
@@ -89,7 +89,7 @@ trait AstForPrimitivesCreator(implicit withSchemaValidation: ValidationMode) { t
8989
val argumentAsts = expressions.zip(quasis).flatMap { case (expression, quasi) =>
9090
List(astForNodeWithFunctionReference(quasi), astForNodeWithFunctionReference(expression))
9191
}
92-
val argAsts = argumentAsts :+ astForNodeWithFunctionReference(quasisTail)
92+
val argAsts = (argumentAsts :+ astForNodeWithFunctionReference(quasisTail)).toSeq
9393
callAst(templateCall, argAsts)
9494
}
9595
}

0 commit comments

Comments
 (0)