1+ #include " JSONFox.h"
2+ * JScriptScanner
3+ define class JScriptScanner as custom
4+ Hidden source
5+ hidden line
6+
7+ Hidden capacity
8+ Hidden length
9+
10+ Dimension tokens[1 ]
11+ oScript = .null.
12+
13+ function init (tcSource)
14+ With this
15+ .length = 1
16+ .capacity = 0
17+ && IRODG 11 /08 /2023 Inicio
18+ * We remove possible invalid characters from the input source.
19+ tcSource = STRTRAN (tcSource, CHR (0 ))
20+ tcSource = STRTRAN (tcSource, CHR (10 ))
21+ tcSource = STRTRAN (tcSource, CHR (13 ))
22+ && IRODG 11 /08 /2023 Fin
23+ .source = tcSource
24+ .line = 1
25+ endwith
26+ endfunc
27+
28+ function escapeCharacters (tcLexeme)
29+ * Convert all escape sequences
30+ tcLexeme = Strtran (tcLexeme, '\\' , '\')
31+ tcLexeme = Strtran (tcLexeme, '\/' , '/' )
32+ tcLexeme = Strtran (tcLexeme, '\n' , Chr (10 ))
33+ tcLexeme = Strtran (tcLexeme, '\r' , Chr (13 ))
34+ tcLexeme = Strtran (tcLexeme, '\t' , Chr (9 ))
35+ tcLexeme = Strtran (tcLexeme, '\"' , '"' )
36+ tcLexeme = Strtran (tcLexeme, "\' " , "'" )
37+ return tcLexeme
38+ endfunc
39+
40+ procedure increaseNewLine
41+ set step on
42+ this.line = this.line + 1
43+ endproc
44+
45+ function checkUnicodeFormat (tcLexeme)
46+ * Look for unicode format
47+ ** This conversion is better (in performance) than Regular Expressions.
48+ && IRODG 09 /10 /2023 Inicio
49+ local lcUnicode, lcConversion, lbReplace, lnPos
50+ lnPos = 1
51+ do while .T.
52+ lbReplace = .F.
53+ lcUnicode = substr (tcLexeme, at ('\u' , tcLexeme, lnPos), 6 )
54+ if len (lcUnicode) == 6
55+ lbReplace = .T.
56+ else
57+ lcUnicode = substr (tcLexeme, at ('\U' , tcLexeme, lnPos), 6 )
58+ if len (lcUnicode) == 6
59+ lbReplace = .T.
60+ endif
61+ endif
62+ if lbReplace
63+ tcLexeme = strtran (tcLexeme, lcUnicode, strtran (strconv (lcUnicode,16 ), chr (0 )))
64+ else
65+ exit
66+ endif
67+ enddo
68+ && IRODG 09 /10 /2023 Fin
69+ return tcLexeme
70+ endfunc
71+
72+ Function scanTokens
73+ With this
74+ Dimension .tokens[1 ]
75+
76+ this.oScript = Createobject ([MSScriptcontrol.scriptcontrol.1 ])
77+ this.oScript .Language = "JScript"
78+ *this.oScript .AddCode (strconv (filetostr ('F:\Desarrollo\GitHub\JSONFox\scanner.js'),11 ))
79+ local lcScript
80+ lcScript = this.loadScript ()
81+ _cliptext = lcScript
82+ messagebox (lcScript)
83+ this.oScript .AddCode (lcScript)
84+ this.oScript .AddObject ("oScanner" , this)
85+ this.oScript .Run ("ScanTokens" , this.source)
86+ .capacity = .length-1
87+ * Shrink array
88+ Dimension .tokens[.capacity]
89+ endwith
90+ Return @ this.tokens
91+ endfunc
92+
93+ function log (tcContent)
94+ ? tcContent
95+ strtofile (tcContent + CRLF, 'f:\desarrollo\github\jsonfox\trace.log', 1 )
96+ endfunc
97+
98+ function addToken (tnTokenType, tcTokenValue)
99+ With this
100+ .checkCapacity ()
101+ local loToken
102+ loToken = createobject ("Empty" )
103+ =addproperty (loToken, "type" , tnTokenType)
104+ =addproperty (loToken, "value" , tcTokenValue)
105+ =AddProperty (loToken, "line" , .line)
106+
107+ .tokens[.length] = loToken
108+ .length = .length + 1
109+ EndWith
110+ EndFunc
111+
112+ Hidden function checkCapacity
113+ With this
114+ If .capacity < .length + 1
115+ If Empty (.capacity)
116+ .capacity = 8
117+ Else
118+ .capacity = .capacity * 2
119+ EndIf
120+ Dimension .tokens[.capacity]
121+ EndIf
122+ endwith
123+ endfunc
124+
125+ procedure showError (tcCharacter, tnCurrent)
126+ local lcMessage
127+ lcMessage = "Unknown character ['" + transform (tcCharacter) + "'], ascii: [" + TRANSFORM (ASC (tcCharacter)) + "]"
128+ error "SYNTAX ERROR: (" + TRANSFORM (this.line) + ":" + TRANSFORM (tnCurrent) + ")" + lcMessage
129+ endproc
130+
131+ function tokenStr (toToken)
132+ local lcType, lcValue
133+ lcType = _screen .jsonUtils.tokenTypeToStr (toToken.type)
134+ lcValue = alltrim (transform (toToken.value))
135+ return "Token(" + lcType + ", '" + lcValue + "') at Line(" + Alltrim (Str (toToken.Line)) + ")"
136+ endfunc
137+
138+ function loadScript
139+ local lcScript
140+ text to lcScript noshow
141+ var C_LBRACE = 1
142+ var C_RBRACE = 2
143+ var C_LBRACKET = 3
144+ var C_RBRACKET = 4
145+ var C_COMMA = 5
146+ var C_COLON = 6
147+ var C_NULL = 9
148+ var C_NUMBER = 10
149+ var C_STRING = 12
150+ var C_EOF = 17
151+ var C_BOOLEAN = 18
152+ var C_NEWLINE = 19
153+
154+ var Spec = [
155+ // --------------------------------------
156+ // Whitespace:
157+ [/^[ \t\r\f]/, null],
158+
159+ // --------------------------------------
160+ // New line:
161+ [/^\n/, C_NEWLINE],
162+
163+ // --------------------------------------
164+ // Keywords
165+ [/^\btrue\b/, C_BOOLEAN],
166+ [/^\bfalse\b/, C_BOOLEAN],
167+ [/^\bnull\b/, C_NULL],
168+
169+ // --------------------------------------
170+ // Symbols
171+ [/^\{/, C_LBRACE],
172+ [/^\}/, C_RBRACE],
173+ [/^\[/, C_LBRACKET],
174+ [/^\]/, C_RBRACKET],
175+ [/^\:/, C_COLON],
176+ [/^\,/, C_COMMA],
177+
178+ // --------------------------------------
179+ // Numbers:
180+ [/^-?\d+(,\d{3 })*(\.\d+)?([eE][-+]?\d+)?/, C_NUMBER],
181+
182+ // --------------------------------------
183+ // Double quoted string:
184+ [/^"/, C_STRING]
185+ ];
186+
187+ var _scannerString;
188+ var _scannerCursor;
189+
190+ function ScanTokens(source) {
191+ _scannerString = source;
192+ _scannerCursor = 0; // track the position of each character
193+
194+ while (_scannerCursor < _scannerString.length) {
195+ var token = _getNextToken();
196+ if (token == null) {
197+ break;
198+ }
199+ oScanner.AddToken(token.type, token.value);
200+ }
201+ oScanner.AddToken(C_EOF, '');
202+ }
203+
204+ function _getNextToken() {
205+ if (_scannerCursor >= _scannerString.length) {
206+ return null;
207+ }
208+ var string = _scannerString.slice(_scannerCursor);
209+
210+ for (var i = 0; i < Spec.length; i++) {
211+ var regexp = Spec[i][0];
212+ var tokenType = Spec[i][1];
213+ var tokenValue = _matchRegEx(regexp, string);
214+
215+ if (tokenValue == null) {
216+ continue;
217+ }
218+
219+ if (tokenType == null) {
220+ return _getNextToken();
221+ }
222+
223+ if (tokenType === C_NEWLINE) {
224+ oScanner.increaseNewLine();
225+ return _getNextToken();
226+ }
227+ var literal = tokenValue;
228+ if (tokenType === C_STRING) {
229+ literal = _parseString();
230+ }
231+
232+ return {
233+ type: tokenType,
234+ value: literal
235+ };
236+ }
237+
238+ oScanner.showError(string[0], _scannerCursor);
239+ }
240+
241+ function _matchRegEx(regexp, string) {
242+ var matched = regexp.exec(string);
243+ if (matched == null) {
244+ return null;
245+ }
246+ _scannerCursor += matched[0].length;
247+ return matched[0];
248+ }
249+
250+ function _parseString() {
251+ var ch = '';
252+ var looping = true;
253+ var start = _scannerCursor-1;
254+ var pn = '';
255+ while (_scannerCursor < _scannerString.length) {
256+ ch = _scannerString.charAt(_scannerCursor);
257+ switch (ch) {
258+ case '\\ ':
259+ pn = (_scannerCursor+1 <= _scannerString.length) ? _scannerString.charAt(_scannerCursor+1) : '';
260+ if (pn === '\\ ' || pn === '/' || pn === 'n' || pn === 'r' || pn === 't' || pn === '" ' || pn == = "'" ) {
261+ _scannerCursor ++;
262+ }
263+ break;
264+ case '"' :
265+ looping = false;
266+ break;
267+ default:
268+ break;
269+ }
270+ _scannerCursor ++;
271+ if (! looping) {
272+ break;
273+ }
274+ }
275+ var lexeme = _scannerString .slice (start+1 , _scannerCursor -1 );
276+ lexeme = oScanner .escapeCharacters (lexeme);
277+ lexeme = oScanner .checkUnicodeFormat (lexeme);
278+ return lexeme;
279+ }
280+ endtext
281+ return lcScript
282+ endfunc
283+ enddefine
0 commit comments