Skip to content

Commit 96f2c86

Browse files
committed
Adding some minor optimisations: the main culprit is still with the language parser (too slow, but almost negligible.. still, it dominates on heavier data)
1 parent 8fb9fb6 commit 96f2c86

19 files changed

Lines changed: 703 additions & 336 deletions

File tree

.github/workflows/cmake.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ name: CMake
22

33
on:
44
push:
5-
branches: [ "v2.0" ]
5+
branches: [ "prochainefois" ]
66
pull_request:
7-
branches: [ "v2.0" ]
7+
branches: [ "prochainefois" ]
88

99
env:
1010
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,4 @@ __pycache__
5656
viz/parsers/__pycache__/
5757
/myeasylog.log
5858
/out.csv
59+
/cmake-build-debug-coverage/

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ add_library(yaucl_graph OBJECT
9494
)
9595
target_link_libraries(yaucl_graph yaucl_hashing roaring)
9696

97-
add_library(gsm_script ./antlr4/scriptv2/scriptBaseListener.cpp
97+
add_library(gsm_script OBJECT ./antlr4/scriptv2/scriptBaseListener.cpp
9898
./antlr4/scriptv2/scriptBaseVisitor.cpp
9999
./antlr4/scriptv2/scriptLexer.cpp
100100
./antlr4/scriptv2/scriptParser.cpp
@@ -103,13 +103,13 @@ add_library(gsm_script ./antlr4/scriptv2/scriptBaseListener.cpp
103103
./src/scriptv2/ScriptAST.cpp ./include/scriptv2/ScriptAST.h ./src/scriptv2/Funzione.cpp ./include/scriptv2/Funzione.h ./src/scriptv2/Javification.cpp ./include/scriptv2/Javification.h ./src/scriptv2/ScriptVisitor.cpp ./include/scriptv2/ScriptVisitor.h)
104104
target_link_libraries(gsm_script antlr4_static)
105105

106-
add_library(gsm2 antlr4/graph_grammar/simple_graph_grammarLexer.cpp antlr4/graph_grammar/simple_graph_grammarParser.cpp antlr4/graph_grammar/simple_graph_grammarListener.cpp antlr4/graph_grammar/simple_graph_grammarVisitor.cpp src/database/PhiTable.cpp include/database/PhiTable.h src/database/AttributeTable.cpp include/database/AttributeTable.h src/database/SimplifiedFuzzyStringMatching.cpp include/database/SimplifiedFuzzyStringMatching.h src/database/LinearGSM.cpp include/database/LinearGSM.h src/database/ActivityTable.cpp include/database/ActivityTable.h src/result.cpp include/result.h src/database/gsm_indices.cpp include/database/gsm_indices.h src/queries/DataQuery.cpp include/queries/DataQuery.h src/database/gsm_object_xi_content.cpp include/database/gsm_object_xi_content.h include/simple_pair_hash.h src/queries/DataPredicate.cpp include/queries/DataPredicate.h
106+
add_library(gsm2 OBJECT antlr4/graph_grammar/simple_graph_grammarLexer.cpp antlr4/graph_grammar/simple_graph_grammarParser.cpp antlr4/graph_grammar/simple_graph_grammarListener.cpp antlr4/graph_grammar/simple_graph_grammarVisitor.cpp src/database/PhiTable.cpp include/database/PhiTable.h src/database/AttributeTable.cpp include/database/AttributeTable.h src/database/SimplifiedFuzzyStringMatching.cpp include/database/SimplifiedFuzzyStringMatching.h src/database/LinearGSM.cpp include/database/LinearGSM.h src/database/ActivityTable.cpp include/database/ActivityTable.h src/result.cpp include/result.h src/database/gsm_indices.cpp include/database/gsm_indices.h src/queries/DataQuery.cpp include/queries/DataQuery.h src/database/gsm_object_xi_content.cpp include/database/gsm_object_xi_content.h include/simple_pair_hash.h src/queries/DataPredicate.cpp include/queries/DataPredicate.h
107107
#src/database/AtomizingPipeline2.cpp include/database/AtomizingPipeline2.h
108108
submodules/easylogging/src/easylogging++.cc submodules/easylogging/src/easylogging++.h
109109
src/database/GSMPatternVisitor.cpp include/database/GSMPatternVisitor.h src/database/GraphEdgeMatchTable.cpp include/database/GraphEdgeMatchTable.h src/database/SimpleTable.cpp include/database/SimpleTable.h include/database/gsm_object.h src/database/gsm_object.cpp include/database/gsm_inmemory_db.h src/database/gsm_inmemory_db.cpp include/database/utility.h src/queries/preserve_results.cpp include/queries/preserve_results.h src/queries/delta_updates.cpp include/queries/delta_updates.h src/queries/closure.cpp include/queries/closure.h src/database/GSMIso.cpp include/database/GSMIso.h include/parsing.h include/configuration/base.h src/configuration/Serialisation.cpp include/configuration/Serialisation.h src/configuration/Configuration.cpp include/configuration/Configuration.h src/configuration/Environment.cpp include/configuration/Environment.h)
110110
target_link_libraries(gsm2 antlr4_static yaucl_structures roaring yaucl_hashing yaucl_graph )
111111

112-
add_executable(gsm2_server main.cpp src/queries/NestedResultTable.cpp include/queries/NestedResultTable.h)
112+
add_executable(gsm2_server main.cpp src/queries/NestedResultTable.cpp include/queries/NestedResultTable.h src/database/SchemaIndexer.cpp include/database/SchemaIndexer.h)
113113
target_link_libraries(gsm2_server antlr4_static yaucl_structures roaring yaucl_hashing yaucl_graph gsm2 gsm_script)
114114

115115
#enable_testing ()

data/test/einstein/einstein_query.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ p3 = (V)--[∀n:nsubj]->(>>S)
5555
del T
5656
del M
5757

58-
(S);
58+
(S) ;
5959

6060
p4 = (V)--[∀l:dobj||ccomp||nmod]->(Z)
6161
(Z)--[? case]->(T)

data/test/simple/schema.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
pos
2+
SizeTAtt

data/test/simple/simple.txt

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
id:0
2+
ell:
3+
VBP
4+
root
5+
.
6+
xi:
7+
play
8+
.
9+
properties:
10+
"pos" "4"
11+
.
12+
phi:
13+
"punct"
14+
1.0 1
15+
;"nsubj"
16+
1.0 2
17+
1.0 3
18+
;"dobj"
19+
1.0 4
20+
;.
21+
22+
id:2
23+
ell:
24+
noun
25+
.
26+
xi:
27+
alice
28+
.
29+
properties:
30+
"number" "singular"
31+
"pos" "1"
32+
"specification" "common"
33+
.
34+
phi:
35+
"cc"
36+
1.0 5
37+
;"conj"
38+
1.0 3
39+
;.
40+
41+
id:5
42+
ell:
43+
CC
44+
.
45+
xi:
46+
and
47+
.
48+
properties:
49+
"pos" "2"
50+
.
51+
phi:
52+
.
53+
54+
id:3
55+
ell:
56+
noun
57+
.
58+
xi:
59+
bob
60+
.
61+
properties:
62+
"number" "singular"
63+
"pos" "3"
64+
"specification" "common"
65+
.
66+
phi:
67+
.
68+
69+
id:4
70+
ell:
71+
noun
72+
.
73+
xi:
74+
cricket
75+
.
76+
properties:
77+
"number" "singular"
78+
"pos" "5"
79+
"specification" "common"
80+
.
81+
phi:
82+
.
83+
84+
id:1
85+
ell:
86+
<dot>
87+
.
88+
xi:
89+
<dot>
90+
.
91+
properties:
92+
"pos" "6"
93+
.
94+
phi:
95+
.
96+
97+

include/database/GSMPatternVisitor.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ struct test_pred {
6161
std::string nsoe;
6262
std::string pattern_matched, variable_matched;
6363
DPtr<std::unordered_map<std::string, DPtr<script::structures::ScriptAST>>> optGamma;
64-
void* ptrResult{nullptr};//scriptParser::ScriptContext
64+
DPtr<script::structures::ScriptAST> ptrResult{nullptr};//
6565
DEFAULT_CONSTRUCTORS(test_pred)
6666
};
6767

@@ -85,7 +85,7 @@ struct rewrite_expr {
8585
NODE_OR_EDGE,
8686
SCRIPT_CASE
8787
};
88-
void* ptrResult{nullptr};
88+
DPtr<script::structures::ScriptAST> ptrResult{nullptr};
8989
cases t; // Using enumeration instead of inheritance
9090
size_t id; // Potential id associated to the match
9191

include/database/SchemaIndexer.h

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
//
2+
// Created by giacomo on 16/01/24.
3+
//
4+
5+
#ifndef GSM2_SCHEMAINDEXER_H
6+
#define GSM2_SCHEMAINDEXER_H
7+
8+
#include <unordered_map>
9+
#include <set>
10+
#include <string>
11+
#include <vector>
12+
13+
#include "utility.h"
14+
#include "scriptv2/java_types.h"
15+
#include "scriptv2/ScriptAST.h"
16+
17+
struct SchemaIndexer {
18+
std::unordered_map<std::string, size_t> field_to_schema;
19+
std::unordered_map<std::string, std::unordered_map<std::string, size_t>> nested_schema;
20+
std::set<size_t> isNull, cow;
21+
std::vector<DPtr<script::structures::ScriptAST>> values;
22+
std::set<size_t> strings;
23+
const std::vector<value>* row;
24+
const std::vector<std::string>* schema;
25+
26+
SchemaIndexer(size_t n) : values(n, std::make_shared<script::structures::ScriptAST>()){
27+
fullyIndexed = false;
28+
row = nullptr;
29+
}
30+
void setUp(const std::vector<std::string>* schema) {
31+
this->schema = {schema};
32+
for (size_t i = 0, N = schema->size(); i<N; i++) {
33+
field_to_schema.emplace(schema->at(i), i);
34+
}
35+
36+
}
37+
38+
void initialize(const std::vector<value>* nestedRow) {
39+
if (row == nestedRow) return;
40+
row = nestedRow;
41+
isNull.clear();
42+
cow.clear();
43+
for (size_t i = 0, N = std::min(nestedRow->size(), field_to_schema.size()); i<N; i++) {
44+
const auto& cell = nestedRow->at(i);
45+
if (!cell.isNested) {
46+
if (std::holds_alternative<bool>(cell.val))
47+
isNull.insert(i);
48+
else if (!std::holds_alternative<size_t>(cell.val)) {
49+
strings.emplace(i);
50+
}
51+
} else {
52+
if (!fullyIndexed) {
53+
for (size_t j = 0, M = cell.table.Schema.size(); j<M; j++) {
54+
nested_schema[schema->at(i)].emplace(cell.table.Schema.at(j), j);
55+
}
56+
}
57+
// TODO: efficient nested values on demand (COW)
58+
}
59+
}
60+
fullyIndexed = true;
61+
}
62+
63+
bool hasKey(const std::string& key) const {
64+
return field_to_schema.contains(key);
65+
}
66+
67+
size_t getNativeInt(const std::string& key) const {
68+
auto it = field_to_schema.find(key);
69+
if ((!row) || it == field_to_schema.end() || (isNull.contains(it->second)))
70+
return -1;
71+
else
72+
return std::get<size_t>(row->at(it->second).val);
73+
}
74+
75+
DPtr<script::structures::ScriptAST> get(const std::string& key);
76+
77+
private:
78+
bool fullyIndexed;
79+
};
80+
81+
struct TemplateIndexer {
82+
std::unordered_map<size_t, SchemaIndexer> map;
83+
size_t current_template;
84+
85+
bool contains(const std::string& key) const {
86+
auto it = map.find(current_template);
87+
return it != map.end() && (it->second.hasKey(key));
88+
}
89+
90+
DPtr<script::structures::ScriptAST> get(const std::string&key) {
91+
auto it = map.find(current_template);
92+
if (it == map.end() )
93+
return script::structures::ScriptAST::null_();
94+
return it->second.get(key);
95+
}
96+
97+
size_t getNativeInt(const std::string&key) const {
98+
auto it = map.find(current_template);
99+
if (it == map.end() )
100+
return -1;
101+
return it->second.getNativeInt(key);
102+
}
103+
104+
SchemaIndexer& attemptInitialise(size_t i, const std::vector<std::string> *vector) {
105+
if (!map.contains(i)) {
106+
auto& ref = map.emplace(i, vector->size()).first->second;
107+
ref.setUp(vector);
108+
return ref;
109+
} else {
110+
return map.at(i);
111+
}
112+
}
113+
};
114+
115+
116+
117+
#endif //GSM2_SCHEMAINDEXER_H

0 commit comments

Comments
 (0)