Skip to content

Commit 3b9ac85

Browse files
committed
new JScript lexer (faster)
This lexer works under the JScript ActiveX COM Server scripting language provided by Microsoft. To activate this lexer you must set the JScriptScanner property to .T.
1 parent 0e5ef7a commit 3b9ac85

8 files changed

Lines changed: 420 additions & 140 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ src/jsonstringify.ERR
44
*.zip
55
*.PJT
66
*.pjx
7+
scanner.js
78
src/arraytocursor.BAK
89
JSONFoxHelper/obj/x86/Release/JSONFoxHelper.pdb
910
JSONFoxHelper/obj/x86/Release/JSONFoxHelper.dll

JSONFox.PJT

7.8 KB
Binary file not shown.

JSONFox.pjx

130 Bytes
Binary file not shown.

jsonfox.app

7.42 KB
Binary file not shown.

src/jscriptscanner.prg

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
#include "JSONFox.h"
2+
* JScriptScanner
3+
define class JScriptScanner as custom
4+
Hidden source
5+
hidden line
6+
7+
Hidden capacity
8+
Hidden length
9+
10+
Dimension tokens[1]
11+
oScript = .null.
12+
13+
function init(tcSource)
14+
With this
15+
.length = 1
16+
.capacity = 0
17+
&& IRODG 11/08/2023 Inicio
18+
* We remove possible invalid characters from the input source.
19+
tcSource = STRTRAN(tcSource, CHR(0))
20+
tcSource = STRTRAN(tcSource, CHR(10))
21+
tcSource = STRTRAN(tcSource, CHR(13))
22+
&& IRODG 11/08/2023 Fin
23+
.source = tcSource
24+
.line = 1
25+
endwith
26+
endfunc
27+
28+
function escapeCharacters(tcLexeme)
29+
* Convert all escape sequences
30+
tcLexeme = Strtran(tcLexeme, '\\', '\')
31+
tcLexeme = Strtran(tcLexeme, '\/', '/')
32+
tcLexeme = Strtran(tcLexeme, '\n', Chr(10))
33+
tcLexeme = Strtran(tcLexeme, '\r', Chr(13))
34+
tcLexeme = Strtran(tcLexeme, '\t', Chr(9))
35+
tcLexeme = Strtran(tcLexeme, '\"', '"')
36+
tcLexeme = Strtran(tcLexeme, "\'", "'")
37+
return tcLexeme
38+
endfunc
39+
40+
procedure increaseNewLine
41+
set step on
42+
this.line = this.line + 1
43+
endproc
44+
45+
function checkUnicodeFormat(tcLexeme)
46+
* Look for unicode format
47+
** This conversion is better (in performance) than Regular Expressions.
48+
&& IRODG 09/10/2023 Inicio
49+
local lcUnicode, lcConversion, lbReplace, lnPos
50+
lnPos = 1
51+
do while .T.
52+
lbReplace = .F.
53+
lcUnicode = substr(tcLexeme, at('\u', tcLexeme, lnPos), 6)
54+
if len(lcUnicode) == 6
55+
lbReplace = .T.
56+
else
57+
lcUnicode = substr(tcLexeme, at('\U', tcLexeme, lnPos), 6)
58+
if len(lcUnicode) == 6
59+
lbReplace = .T.
60+
endif
61+
endif
62+
if lbReplace
63+
tcLexeme = strtran(tcLexeme, lcUnicode, strtran(strconv(lcUnicode,16), chr(0)))
64+
else
65+
exit
66+
endif
67+
enddo
68+
&& IRODG 09/10/2023 Fin
69+
return tcLexeme
70+
endfunc
71+
72+
Function scanTokens
73+
With this
74+
Dimension .tokens[1]
75+
76+
this.oScript = Createobject([MSScriptcontrol.scriptcontrol.1])
77+
this.oScript.Language = "JScript"
78+
*this.oScript.AddCode(strconv(filetostr('F:\Desarrollo\GitHub\JSONFox\scanner.js'),11))
79+
local lcScript
80+
lcScript = this.loadScript()
81+
_cliptext = lcScript
82+
messagebox(lcScript)
83+
this.oScript.AddCode(lcScript)
84+
this.oScript.AddObject("oScanner", this)
85+
this.oScript.Run("ScanTokens", this.source)
86+
.capacity = .length-1
87+
* Shrink array
88+
Dimension .tokens[.capacity]
89+
endwith
90+
Return @this.tokens
91+
endfunc
92+
93+
function log(tcContent)
94+
? tcContent
95+
strtofile(tcContent + CRLF, 'f:\desarrollo\github\jsonfox\trace.log', 1)
96+
endfunc
97+
98+
function addToken(tnTokenType, tcTokenValue)
99+
With this
100+
.checkCapacity()
101+
local loToken
102+
loToken = createobject("Empty")
103+
=addproperty(loToken, "type", tnTokenType)
104+
=addproperty(loToken, "value", tcTokenValue)
105+
=AddProperty(loToken, "line", .line)
106+
107+
.tokens[.length] = loToken
108+
.length = .length + 1
109+
EndWith
110+
EndFunc
111+
112+
Hidden function checkCapacity
113+
With this
114+
If .capacity < .length + 1
115+
If Empty(.capacity)
116+
.capacity = 8
117+
Else
118+
.capacity = .capacity * 2
119+
EndIf
120+
Dimension .tokens[.capacity]
121+
EndIf
122+
endwith
123+
endfunc
124+
125+
procedure showError(tcCharacter, tnCurrent)
126+
local lcMessage
127+
lcMessage = "Unknown character ['" + transform(tcCharacter) + "'], ascii: [" + TRANSFORM(ASC(tcCharacter)) + "]"
128+
error "SYNTAX ERROR: (" + TRANSFORM(this.line) + ":" + TRANSFORM(tnCurrent) + ")" + lcMessage
129+
endproc
130+
131+
function tokenStr(toToken)
132+
local lcType, lcValue
133+
lcType = _screen.jsonUtils.tokenTypeToStr(toToken.type)
134+
lcValue = alltrim(transform(toToken.value))
135+
return "Token(" + lcType + ", '" + lcValue + "') at Line(" + Alltrim(Str(toToken.Line)) + ")"
136+
endfunc
137+
138+
function loadScript
139+
local lcScript
140+
text to lcScript noshow
141+
var C_LBRACE = 1
142+
var C_RBRACE = 2
143+
var C_LBRACKET = 3
144+
var C_RBRACKET = 4
145+
var C_COMMA = 5
146+
var C_COLON = 6
147+
var C_NULL = 9
148+
var C_NUMBER = 10
149+
var C_STRING = 12
150+
var C_EOF = 17
151+
var C_BOOLEAN = 18
152+
var C_NEWLINE = 19
153+
154+
var Spec = [
155+
// --------------------------------------
156+
// Whitespace:
157+
[/^[ \t\r\f]/, null],
158+
159+
// --------------------------------------
160+
// New line:
161+
[/^\n/, C_NEWLINE],
162+
163+
// --------------------------------------
164+
// Keywords
165+
[/^\btrue\b/, C_BOOLEAN],
166+
[/^\bfalse\b/, C_BOOLEAN],
167+
[/^\bnull\b/, C_NULL],
168+
169+
// --------------------------------------
170+
// Symbols
171+
[/^\{/, C_LBRACE],
172+
[/^\}/, C_RBRACE],
173+
[/^\[/, C_LBRACKET],
174+
[/^\]/, C_RBRACKET],
175+
[/^\:/, C_COLON],
176+
[/^\,/, C_COMMA],
177+
178+
// --------------------------------------
179+
// Numbers:
180+
[/^-?\d+(,\d{3})*(\.\d+)?([eE][-+]?\d+)?/, C_NUMBER],
181+
182+
// --------------------------------------
183+
// Double quoted string:
184+
[/^"/, C_STRING]
185+
];
186+
187+
var _scannerString;
188+
var _scannerCursor;
189+
190+
function ScanTokens(source) {
191+
_scannerString = source;
192+
_scannerCursor = 0; // track the position of each character
193+
194+
while (_scannerCursor < _scannerString.length) {
195+
var token = _getNextToken();
196+
if (token == null) {
197+
break;
198+
}
199+
oScanner.AddToken(token.type, token.value);
200+
}
201+
oScanner.AddToken(C_EOF, '');
202+
}
203+
204+
function _getNextToken() {
205+
if (_scannerCursor >= _scannerString.length) {
206+
return null;
207+
}
208+
var string = _scannerString.slice(_scannerCursor);
209+
210+
for (var i = 0; i < Spec.length; i++) {
211+
var regexp = Spec[i][0];
212+
var tokenType = Spec[i][1];
213+
var tokenValue = _matchRegEx(regexp, string);
214+
215+
if (tokenValue == null) {
216+
continue;
217+
}
218+
219+
if (tokenType == null) {
220+
return _getNextToken();
221+
}
222+
223+
if (tokenType === C_NEWLINE) {
224+
oScanner.increaseNewLine();
225+
return _getNextToken();
226+
}
227+
var literal = tokenValue;
228+
if (tokenType === C_STRING) {
229+
literal = _parseString();
230+
}
231+
232+
return {
233+
type: tokenType,
234+
value: literal
235+
};
236+
}
237+
238+
oScanner.showError(string[0], _scannerCursor);
239+
}
240+
241+
function _matchRegEx(regexp, string) {
242+
var matched = regexp.exec(string);
243+
if (matched == null) {
244+
return null;
245+
}
246+
_scannerCursor += matched[0].length;
247+
return matched[0];
248+
}
249+
250+
function _parseString() {
251+
var ch = '';
252+
var looping = true;
253+
var start = _scannerCursor-1;
254+
var pn = '';
255+
while (_scannerCursor < _scannerString.length) {
256+
ch = _scannerString.charAt(_scannerCursor);
257+
switch (ch) {
258+
case '\\':
259+
pn = (_scannerCursor+1 <= _scannerString.length) ? _scannerString.charAt(_scannerCursor+1) : '';
260+
if (pn === '\\' || pn === '/' || pn === 'n' || pn === 'r' || pn === 't' || pn === '"' || pn === "'") {
261+
_scannerCursor++;
262+
}
263+
break;
264+
case '"':
265+
looping = false;
266+
break;
267+
default:
268+
break;
269+
}
270+
_scannerCursor++;
271+
if (!looping) {
272+
break;
273+
}
274+
}
275+
var lexeme = _scannerString.slice(start+1, _scannerCursor-1);
276+
lexeme = oScanner.escapeCharacters(lexeme);
277+
lexeme = oScanner.checkUnicodeFormat(lexeme);
278+
return lexeme;
279+
}
280+
endtext
281+
return lcScript
282+
endfunc
283+
enddefine

src/jsonclass.prg

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,18 @@ define class JSONClass as session
44
LastErrorText = ""
55
lError = .f.
66
lShowErrors = .t.
7-
version = "9.27"
7+
version = "10.1"
88
hidden lInternal
99
hidden lTablePrompt
1010
Dimension aCustomArray[1]
1111
&& >>>>>>> IRODG 07/01/21
1212
* Set this property to .T. if you want the lexer uses JSONFoxHelper.dll
1313
NETScanner = .f.
1414
&& <<<<<<< IRODG 07/01/21
15+
16+
&& >>>>>>> IRODG 02/27/24
17+
JScriptScanner = .f.
18+
&& <<<<<<< IRODG 02/27/24
1519

1620
*Function Init
1721
function init
@@ -32,11 +36,14 @@ define class JSONClass as session
3236
try
3337
this.ResetError()
3438
local lexer, parser
35-
if this.NETScanner
39+
do case
40+
case this.NETScanner
3641
lexer = createobject("NetScanner", tcJsonStr)
37-
else
42+
else this.JScriptScanner
43+
lexer = createobject("JScriptScanner", tcJsonStr)
44+
otherwise
3845
lexer = createobject("Tokenizer", tcJsonStr)
39-
endif
46+
endcase
4047
parser = createobject("Parser", lexer)
4148
loJSONObj = parser.Parse()
4249

@@ -64,11 +71,14 @@ define class JSONClass as session
6471
try
6572
this.ResetError()
6673
local lexer
67-
if this.NETScanner
74+
do case
75+
case this.NETScanner
6876
lexer = createobject("NetScanner", tcJsonStr)
69-
else
77+
case this.JScriptScanner
78+
lexer = createobject("JScriptScanner", tcJsonStr)
79+
otherwise
7080
lexer = createobject("Tokenizer", tcJsonStr)
71-
endif
81+
endcase
7282
Local laTokens
7383
laTokens = lexer.scanTokens()
7484
IF FILE(tcOutput)

0 commit comments

Comments
 (0)