dparse.lexer coverage

      10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500
510
520
530
540
550
560
570
580
590
600
610
620
630
640
650
660
670
680
690
700
710
720
730
740
750
760
770
780
790
800
810
820
830
840
850
860
870
880
890
900
910
920
930
940
950
960
970
980
990
1000
1010
1020
1030
1040
1050
1060
1070
1080
1090
1100
1110
1120
1130
1140
1150
1160
1170
1180
1190
1200
1210
1220
1230
1240
1250
1260
1270
1280
1290
1300
1310
1320
1330
1340
1350
1360
1370
1380
1390
1400
1410
1420
1430
1440
1450
1460
1470
1480
1490
1500
1510
1520
1530
1540
1550
1560
1570
1580
1590
1600
1610
1620
1630
1640
1650
1660
1670
1680
1690
1700
1710
1720
1730
1740
1750
1760
1770
1780
1790
1800
1810
1820
1830
1840
1850
1860
1870
1880
1890
1900
1910
1920
1930
1940
1950
1960
1970
1980
1990
2000
2010
2020
2030
2040
2050
2060
2070
2080
2090
2100
2110
2120
2130
2140
2150
2160
2170
2180
2190
2200
2210
2220
2230
2240
2250
2260
2270
2280
2290
2300
2310
2320
2330
2340
2350
2360
2370
2380
2390
2400
2410
2420
2430
2440
2450
2460
2470
2480
2490
2500
2510
2520
2530
2540
2550
2560
2570
2580
2590
2600
2610
2620
2630
2640
2650
2660
2670
2680
2690
2700
2710
2720
2730
2740
2750
2760
2770
2780
2790
2800
2810
2820
2830
2840
2850
2860
2870
2880
2890
2900
2910
2920
2930
2940
2950
2960
2970
2980
2990
3000
3010
3020
3030
3040
3050
3060
3070
3080
3090
3100
3110
3120
3130
3140
3150
3160
3170
3180
3190
3200
3210
3220
3230
3240
3250
3260
3270
3280
3290
3300
3310
3320
3330
3340
3350
3360
3370
3380
3390
3400
3410
3420
3430
3440
3450
3460
3470
3480
3490
3500
3510
3520
3530
3540
3550
3560
3570
3580
3590
3600
3610
3620
3630
3640
3650
3660
3670
3680
3690
3700
3710
3720
3730
3740
3750
3760
3770
3780
3790
3800
3810
3820
3830
3840
3850
3860
3870
3880
3890
3900
3910
3920
3930
3940
3950
3960
3970
3980
3990
4000
4010
4020
4030
4040
4050
4060
4070
4080
4090
4100
4110
4120
4130
4140
4150
4160
4170
4180
4190
4200
4210
4220
4230
4240
4250
4260
4270
4280
4290
4300
4310
4320
4330
4340
4350
4360
4370
4380
4390
4400
4410
4420
4430
4440
4450
4460
4470
4480
4490
4500
4510
4520
4530
4540
4550
4560
4570
4580
4590
4600
4610
4620
4630
4640
4650
4660
4670
4680
4690
4700
4710
4720
4730
4740
4750
4760
4770
4780
4790
4800
4810
4820
4830
4840
4850
4860
4870
4880
4890
4900
4910
4920
4930
4940
4950
4960
4970
4980
4990
5000
5010
5020
5030
5040
5050
5060
5070
5080
5090
5100
5110
5120
5130
5140
5150
5160
5170
5180
5190
5200
5210
5220
5230
5240
5250
5260
5270
5280
5290
5300
5310
5320
5330
5340
5350
5360
5370
5380
5390
5400
5410
5420
5430
5440
5450
5460
5470
5480
5490
5500
5510
5520
5530
5540
5550
5560
5570
5580
5590
5600
5610
5620
5630
5640
5650
5660
5670
5680
5690
5700
5710
5720
5730
5740
5750
5760
5770
5780
5790
5800
5810
5820
5830
5840
5850
5860
5870
5880
5890
5900
5910
5920
5930
5940
5950
5960
5970
5980
5990
6000
6010
6020
6030
6040
6050
6060
6070
6080
6090
6100
6110
6123135
6130
6140
6150
6160
6170
6180
6190
6200
6210
6220
6230
624649
625649
626649
627649
628258
629258
630258
6310
6320
6330
6340
6350
6360
6370
6380
6390
640258
641258
6420
6432228
6442228
6452228
6460
6470
6480
6490
6500
6510
6520
6530
6540
6550
656107226
6570
65853464
6590
6600
6610
66253613
6630
6640
6650
6660
6670
6680
6690
6700
6710
6720
67317633
67417633
67517633
67617633
67717633
678108
679108
680108
681108
682108
683108
6840
6850
6860
6870
6880
6890
6900
6910
6920
6930
6940
6950
6960
6970
69835872
69935872
7000
70117741
70235905
70335905
70435905
7050
7060
7070
7080
7090
7101746
7110
712873
713873
7140
7150
7160
7170
7180
7190
7200
7210
7220
7230
7240
7250
7260
727873
728873
729873
7300
7310
7320
7330
7340
7350
7360
7370
7380
7390
7400
7410
7420
7430
7440
7450
7460
7470
7480
7490
7500
7510
7520
7530
7540
7550
7560
7570
7580
7590
7600
7610
7620
7630
7640
7650
7660
7670
7680
7690
7700
7710
7720
7730
7740
7750
7760
7770
7780
7790
7800
7810
7820
7830
7840
7850
7860
7870
7880
7890
7900
7910
7920
7930
7940
7950
7960
7970
7980
7990
8000
8010
8020
8030
8040
8050
8060
8070
8080
8090
8100
8110
8120
8130
8140
8150
8160
8170
8180
8190
8200
8210
8220
8230
8240
8250
8260
8270
8280
8290
8300
8310
8320
8330
8340
8350
8360
8370
8380
8390
8400
8410
8420
8430
8440
8450
8460
8470
8480
8490
8500
8510
8520
8530
8540
8550
8560
8570
8580
8590
8600
8610
8620
8630
8640
8650
8660
8670
8680
8690
8700
8710
8720
8731079
8740
8750
8760
8770
8780
8791952
8801952
8811952
8820
8830
8840
8850
8860
8873904
8880
8893904
8900
89117493
8921952
8930
8940
8953904
8960
8971949
8981949
8991949
9000
9010
9023
9030
9040
9050
9061952
9070
9080
9090
9100
9110
9120
9130
9140
9150
9160
9170
9180
9190
9200
9210
9220
9230
9240
9250
9260
9270
9280
92945
930135
93121
9320
9330
9340
9350
9360
93724
9380
93924
94024
94148
94248
94348
94424
94548
9460
9470
9480
9490
95024
9510
9520
9530
9540
9550
9560
9570
9580
9590
9600
9611907
9621907
9630
9640
9651952
9660
9670
9680
9690
9700
9710
9720
9730
9740
9750
9760
9770
9780
9790
9800
9810
9820
9830
9840
9850
9860
9870
9880
9890
9900
9910
9920
9930
9940
9950
9960
9970
9980
9990
10000
10010
10020
10030
10040
10050
10060
10070
10080
10090
10100
10110
10120
10130
10140
10150
10160
10170
10180
10190
10200
10210
10220
10230
10240
10250
10260
10270
10280
10290
10300
10310
10320
10330
10340
10350
10360
10370
10380
10390
10400
10410
10420
10430
10440
10450
10460
10470
10480
10490
10500
10510
10520
10530
10540
10550
10560
10570
10580
10590
10600
10610
10620
10630
10640
10650
10660
10670
10680
10690
10700
10710
10720
10730
10740
10750
10760
10770
10780
10790
10800
10810
10820
10830
10840
10850
10860
10870
10880
10890
10900
10910
10920
10930
10940
10950
10960
10970
10980
10990
11000
11010
11020
11030
11040
11050
110627
110727
1108194
11090
11100
11110
1112388
1113194
11140
11150
1116194
11170
111847
111994
11200
112127
112227
11230
11240
11250
1126147
11270
112827
112927
11300
11310
11320
11330
11340
11350
11360
1137792
1138792
11392191
11400
11410
11420
11434382
11440
11452186
11460
11470
11480
11494382
1150792
11511399
11520
1153792
1154792
11550
11560
11570
11580
11590
11600
11610
116235
116335
116435
116535
11661559
11670
11680
11690
11701524
11710
1172762
11730
11740
11750
1176762
11770
117870
1179140
11800
118135
118235
11830
11840
1185692
11860
118751
1188102
11890
11900
11910
11920
11930
11940
1195641
11960
119735
119835
11990
12000
12010
12020
12030
12040
12050
12063357
12075325
12080
12095325
12100
12110
12120
12130
12140
12150
12160
121710650
12180
12195234
12200
12210
12220
12235325
12240
12253357
12263357
12270
12281968
12290
1230366
12310
12320
12330
12340
12350
12360
12371602
12380
12393357
12403357
12413357
12420
12430
12440
12450
12460
12470
1248506
1249506
1250506
12510
1252506
12531146
12540
12551146
12560
12570
12580
12590
12600
12610
12620
12632292
12640
12651143
12660
12670
12680
12691146
12700
1271506
1272506
12730
12740
1275640
12760
12770
12780
12790
12800
12810
12820
12830
12840
12850
12860
12870
12880
12890
12900
12910
12920
12930
12940
12950
12960
12970
12980
12990
13000
13010
13020
13030
13040
1305506
1306506
13070
13080
13090
13100
13110
13123863
13130
13140
13150
13160
13170
13180
13193863
13200
13210
13220
13230
132411589
13250
13260
13270
13280
13290
13300
13310
13320
13330
13340
13350
13360
13370
13380
13390
13400
13410
13420
13430
13440
13450
13460
13470
13480
13490
13500
13510
13520
13530
13540
13550
13560
13570
13580
13590
13600
13610
13620
13630
13640
13650
13660
13670
13680
13690
13700
13710
13720
13730
13740
13750
13760
13770
13780
13790
13800
13810
13820
13830
13840
13850
13860
13870
13880
13890
13900
13910
13920
13930
13940
13950
13960
13970
13980
13990
14000
14010
14020
14030
14040
14050
14060
14070
14080
14090
14100
14110
14120
14130
14140
14150
14160
14170
14180
14190
14200
14210
14220
14230
14240
14250
14260
14270
14280
14290
14300
14310
14320
14330
14340
14350
14360
14370
14380
14390
14400
14410
14420
14430
14440
14450
14460
14470
14480
14490
14500
14510
14520
14530
14540
14550
14560
14570
14580
14590
14600
14610
14620
14630
14640
14650
14660
14670
14680
14690
14700
14710
14720
14730
14740
14750
14760
14770
14780
14790
14800
14810
14820
14830
14840
14850
14860
14870
14880
14890
14900
14910
14920
14930
14940
14950
14960
14970
14980
14990
15000
15010
15020
15030
15040
15050
15060
15070
15080
15090
15100
15110
15120
15130
15140
15150
15160
15170
15180
15190
15200
15210
15220
15230
15240
15250
15260
15270
15280
15290
15300
15310
15320
15330
15340
15350
15360
15370
15380
15390
15400
15410
15420
15430
15440
15450
15460
15470
15480
15490
15500
15510
15520
1553378
1554378
15550
15560
15570
15580
1559378
15600
15610
15623
156330
156430
156536
156636
156736
156836
1569373
1570374
1571378
1572378
1573378
1574378
15750
15760
15770
15780
15790
15800
15810
15820
15830
15840
15850
15860
15870
15880
15890
15900
15910
15920
15930
15940
15950
15960
15970
15980
15990
16000
16010
16020
16030
16040
16050
16060
16070
16080
16090
16100
16110
16120
16130
16140
16150
16160
16170
16180
16190
16200
16210
16220
16230
16240
16250
16260
16270
16280
16290
16300
16310
16320
16330
16340
16350
16360
16370
16380
16390
16400
16410
16420
16430
16440
16450
16460
16470
16480
16490
16500
16510
16520
16530
16540
16550
16560
16570
16580
16590
16600
16610
16620
16630
16640
16650
16660
16670
16680
16690
16700
16710
16720
16730
16740
1675378
16760
16770
16780
16790
16800
1681117
1682117
16830
1684117
168512
1686105
16870
16880
16890
16900
16910
1692105
16930
16940
16950
16960
16970
1698105
16990
1700234
17010
1702117
1703117
17040
17050
17060
17070
17080
17090
17100
17110
17120
17130
17140
17150
17160
17170
171826504
17190
17200
17210
17220
17230
17240
172527600
17260
17270
17280
172955200
17300
173127489
17320
173327489
173427489
17350
17360
173727600
173826504
17390
17401096
17410
174226504
17430
17440
17450
17460
17470
17480
17499353
17500
17510
17520
17530
17540
17559353
17560
17570
17580
17590
17600
17610
17620
17630
17640
17650
17660
17670
17680
17690
17700
17719353
17729353
17739353
17749353
17750
17760
17770
17780
17790
17800
17810
17820
17830
17840
17850
17860
17870
17880
17890
17900
17910
17920
17930
17940
17950
17960
17970
17980
17990
18000
18010
18020
18030
18040
18050
180663326
18070
180863326
18090
18100
18110
18120
18130
18140
18150
18160
18170
18180
18190
18200
18210
18220
18230
18240
18250
18260
182763326
182863326
182929107
183034219
183134219
18320
18330
18340
18350
18360
18370
18380
18390
18400
18410
18420
18430
18440
18450
18460
18470
18480
18490
18500
18510
18520
18530
18540
18550
18560
18570
18580
18590
18600
18610
18620
18630
18640
18650
18660
18670
18680
18690
18700
18710
18720
18730
18740
18750
18760
18770
18780
18790
18800
18810
18820
18830
18840
18850
18860
18870
18880
18890
18900
18910
18920
18930
18940
18950
18960
18970
18980
18990
19000
19010
19020
19030
19040
19050
19060
19070
19080
19090
19100
19110
19120
19130
19140
19150
19160
19170
19180
19190
19200
19210
19220
19230
19240
19250
19260
19270
19280
19290
19300
19310
19320
19330
19340
19350
19360
19370
19380
19390
19400
19410
19420
19430
19440
19450
19460
194738
19480
19490
19500
19510
19520
195338
195438
1955114
19560
19570
19580
19590
19600
19610
19620
19630
19640
196538
19660
19670
19680
19690
197038
197176
19720
197338
197438
197538
19760
1977467058
19780
1979155648
1980163676
19810
19828028
19830
19848028
19858028
19868028
19870
19880
198938
199038
199138
19920
19930
19940
19950
19960
19970
19980
1999138390
20000
200169195
20020
20030
20040
20050
20060
20070
20080
20090
20100
20110
20120
20130
20140
20150
20160
20170
20180
20190
20200
202169195
202269195
202369195
202469195
202561167
20268028
20278028
20288028
20290
20300
20318028
20328028
20338028
20348028
20358028
20368028
20378028
20388028
20398028
20400
20410
20420
20430
20440
204569195
204669195
204770598
20480
2049123737
205061167
20511403
20520
20538028
20540
20550
20560
20570
20580
205969195
206069195
20610
20620
20630
206469195
206569195
206669195
2067139532
20680
206970337
20700
20710
20720
207370337
207470337
207570337
207670337
207770337
207870337
20790
208069195
20810
208211490
208311490
208411490
208521807
208621807
208721807
208859887
208959887
209059887
209159887
20929308
20939308
20940
209569195
209669195
209769195
209869195
20990
21000
21010
21020
21030
21048028
21050
21060
21078028
21088028
21090
21100
21110
21128028
21138028
211416056
21150
21167990
21177990
21187990
21197990
21200
21217990
212215980
21230
21240
21250
21260
212738
212838
212938
213038
213176
21320
21330
21340
21350
21360
21370
21380
21390
21400
21410
21420
21430
21440
21450
21460
21470
21480
21490
21500
21510
21520
21530
21540
21550
21560
21570
21580
21590
21600
21610
21620
21630
21640
21650
21660
21670
21680
21690
21700
21710
21720
21730
21740
21750
21760
21770
21780
21790
21800
21810
21820
21830
21840
21850
21860
21870
21880
21890
21900
21910
21920
21930
21940
21950
21960
21970
21980
21990
22000
22010
22020
22030
22040
22050
22060
22070
22080
22090
22100
22110
22120
22130
22140
22150
22160
22170
22180
22190
22200
22210
22220
22230
22240
22250
22260
22270
22280
22290
22300
22310
22320
22330
22340
22350
22360
22370
22380
22390
22400
22410
22420
22430
22440
22450
22460
22470
22480
22490
22500
22510
22520
22530
22540
22550
22560
22570
22580
22590
22600
22610
22620
22630
226462983
226562983
226662983
22670
22680
22690
22700
22710
22720
22730
22740
22750
22760
22770
22780
22790
22800
228129438
22820
22830
22840
22850
22860
22870
22880
22890
22900
22910
22920
22930
22940
22950
22960
22970
22980
22990
23000
23010
23020
23030
23040
23050
23060
23070
23080
23090
23100
23110
23120
23130
23140
23150
23160
23170
23180
23190
23200
23210
23220
23230
23240
23250
23260
23270
23280
23290
23300
23310
23320
23330
23340
23350
23360
23370
23380
23390
23400
23410
23420
23430
23440
23450
23460
23470
23480
23490
23500
23510
23520
23530
23540
23550
23560
23570
23580
23590
23600
23610
23620
23630
23640
23650
23660
23670
23680
23690
23700
23710
23720
23730
23740
23750
23760
23770
23780
23790
23800
23810
23820
23830
23840
23850
23860
23870
23880
23890
23900
23910
23920
23930
23940
23950
23960
23970
23980
23990
24000
24010
24020
24030
24040
24050
24060
24070
24080
24090
24100
24110
24120
24130
24140
24150
24160
24170
24180
24190
24200
24210
24220
24230
24240
24250
24260
24270
24280
24290
24300
24310
24320
24330
24340
24350
24360
24370
24380
24390
24400
24410
24420
24430
24440
24450
24460
24470
24480
24490
24500
24510
      module dparse.lexer;

import std.typecons;
import std.typetuple;
import std.array;
import std.algorithm;
import std.range;
import std.experimental.lexer;
import std.traits;
import core.cpuid : sse42;

public import dparse.trivia;

/// Operators
private enum operators = [
    ",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=",
    "!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "*", "*=", "+", "++",
    "+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=",
    "==", "=>", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "]", "^",
    "^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~", "~="
];

/// Kewords
private enum keywords = [
    "abstract", "alias", "align", "asm", "assert", "auto", "bool",
    "break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat",
    "char", "class", "const", "continue", "creal", "dchar", "debug", "default",
    "delegate", "delete", "deprecated", "do", "double", "else", "enum",
    "export", "extern", "false", "final", "finally", "float", "for", "foreach",
    "foreach_reverse", "function", "goto", "idouble", "if", "ifloat",
    "immutable", "import", "in", "inout", "int", "interface", "invariant",
    "ireal", "is", "lazy", "long", "macro", "mixin", "module", "new", "nothrow",
    "null", "out", "override", "package", "pragma", "private", "protected",
    "public", "pure", "real", "ref", "return", "scope", "shared", "short",
    "static", "struct", "super", "switch", "synchronized", "template", "this",
    "throw", "true", "try", "typedef", "typeid", "typeof", "ubyte", "ucent",
    "uint", "ulong", "union", "unittest", "ushort", "version", "void",
    "wchar", "while", "with", "__DATE__", "__EOF__", "__FILE__",
    "__FILE_FULL_PATH__", "__FUNCTION__", "__gshared", "__LINE__", "__MODULE__",
    "__parameters", "__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__", "__traits",
    "__vector", "__VENDOR__", "__VERSION__"
];

/// Other tokens
private enum dynamicTokens = [
    "specialTokenSequence", "comment", "identifier", "scriptLine",
    "whitespace", "doubleLiteral", "floatLiteral", "idoubleLiteral",
    "ifloatLiteral", "intLiteral", "longLiteral", "realLiteral",
    "irealLiteral", "uintLiteral", "ulongLiteral", "characterLiteral",
    "dstringLiteral", "stringLiteral", "wstringLiteral"
];

private enum pseudoTokenHandlers = [
    "\"", "lexStringLiteral",
    "`", "lexWysiwygString",
    "//", "lexSlashSlashComment",
    "/*", "lexSlashStarComment",
    "/+", "lexSlashPlusComment",
    ".", "lexDot",
    "'", "lexCharacterLiteral",
    "0", "lexNumber",
    "1", "lexDecimal",
    "2", "lexDecimal",
    "3", "lexDecimal",
    "4", "lexDecimal",
    "5", "lexDecimal",
    "6", "lexDecimal",
    "7", "lexDecimal",
    "8", "lexDecimal",
    "9", "lexDecimal",
    "q\"", "lexDelimitedString",
    "q{", "lexTokenString",
    "r\"", "lexWysiwygString",
    "x\"", "lexHexString",
    " ", "lexWhitespace",
    "\t", "lexWhitespace",
    "\r", "lexWhitespace",
    "\n", "lexWhitespace",
    "\v", "lexWhitespace",
    "\f", "lexWhitespace",
    "\u2028", "lexLongNewline",
    "\u2029", "lexLongNewline",
    "#!", "lexScriptLine",
    "#line", "lexSpecialTokenSequence"
];

/// Token ID type for the D lexer.
public alias IdType = TokenIdType!(operators, dynamicTokens, keywords);

/**
 * Function used for converting an IdType to a string.
 *
 * Examples:
 * ---
 * IdType c = tok!"case";
 * assert (str(c) == "case");
 * ---
 */
public alias str = tokenStringRepresentation!(IdType, operators, dynamicTokens, keywords);

/**
 * Template used to refer to D token types.
 *
 * See the $(B operators), $(B keywords), and $(B dynamicTokens) enums for
 * values that can be passed to this template.
 * Example:
 * ---
 * import dparse.lexer;
 * IdType t = tok!"floatLiteral";
 * ---
 */
public template tok(string token)
{
    alias tok = TokenId!(IdType, operators, dynamicTokens, keywords, token);
}

mixin template TokenTriviaFields()
{
    /**
     * Whitespace and comment tokens attached to this token.
     *
     * All trivia tokens must have the text property set to the text with
     * which they identify with. This means you can map all trivia tokens to
     * their .text property and join them together to get the source code back
     * without any loss of information.
     *
     * Trivia is only included when calling getTokensForParser. When iterating
     * over DLexer all tokens will be in their raw form and none will be
     * converted to trivia.
     *
     * Note: in the future you might need to explicitly pass
     * WhitespaceBehavior.include (or keep the default) as getTokensForParser
     * currently overrides it to include.
     *
     * Contains: `comment`, `whitespace`, `specialTokenSequence`
     */
    immutable(typeof(this))[] leadingTrivia;
    /// ditto
    immutable(typeof(this))[] trailingTrivia;

    string memoizedLeadingComment = null;
    string memoizedTrailingComment = null;

    /// Legacy property to get documentation comments, with comment border
    /// stripped off, which is attached to this token.
    string comment() const pure nothrow @safe @property {
        import dparse.trivia : extractLeadingDdoc;
        if (memoizedLeadingComment !is null)
            return memoizedLeadingComment;
        return (cast()memoizedLeadingComment) = this.extractLeadingDdoc;
    }

    /// ditto
    string trailingComment() const pure nothrow @safe @property {
        import dparse.trivia : extractTrailingDdoc;
        if (memoizedTrailingComment !is null)
            return memoizedTrailingComment;
        return (cast()memoizedTrailingComment) = this.extractTrailingDdoc;
    }

    int opCmp(size_t i) const pure nothrow @safe @nogc {
        if (index < i) return -1;
        if (index > i) return 1;
        return 0;
    }

    int opCmp(ref const typeof(this) other) const pure nothrow @safe @nogc {
        return opCmp(other.index);
    }
}

// mixin in from dparse.lexer to make error messages more managable size as the
// entire string is dumped when there is a type mismatch.
private enum extraFields = "import dparse.lexer:TokenTriviaFields; mixin TokenTriviaFields;";

/// The token type in the D lexer
public alias Token = std.experimental.lexer.TokenStructure!(IdType, extraFields);

/**
 * Configure whitespace handling
 */
public enum WhitespaceBehavior : ubyte
{
    include = 0b0000_0000,
    skip = 0b0000_0001,
}

private enum stringBehaviorNotWorking = "Automatic string parsing is not "
    ~ "supported and was previously not working. To unescape strings use the "
    ~ "`dparse.strings : unescapeString` function on the token texts instead.";

/**
 * Configure string lexing behavior
 */
// was enum, but struct now for deprecations and support with old compilers
public struct StringBehavior
{
    /// Do not include quote characters, process escape sequences
    deprecated(stringBehaviorNotWorking) static immutable StringBehavior compiler = StringBehavior(0b0000_0000);
    /// Opening quotes, closing quotes, and string suffixes are included in
    /// the string token
    deprecated(stringBehaviorNotWorking) static immutable StringBehavior includeQuoteChars = StringBehavior(0b0000_0001);
    /// String escape sequences are not replaced
    deprecated(stringBehaviorNotWorking) static immutable StringBehavior notEscaped = StringBehavior(0b0000_0010);
    /// Not modified at all. Useful for formatters or highlighters
    static immutable StringBehavior source = StringBehavior(0b0000_0011);

    ubyte behavior;
    alias behavior this;
}

public enum CommentBehavior : bool
{
    intern = true,
    noIntern = false
}
/**
 * Lexer configuration struct
 */
public struct LexerConfig
{
    string fileName;
    StringBehavior stringBehavior;
    WhitespaceBehavior whitespaceBehavior;
    CommentBehavior commentBehavior = CommentBehavior.intern;
}

/**
 * Basic type token types.
 */
public alias BasicTypes = AliasSeq!(tok!"int", tok!"bool", tok!"byte",
        tok!"cdouble", tok!"cent", tok!"cfloat", tok!"char", tok!"creal",
        tok!"dchar", tok!"double", tok!"float", tok!"idouble",
        tok!"ifloat", tok!"ireal", tok!"long", tok!"real", tok!"short",
        tok!"ubyte", tok!"ucent", tok!"uint", tok!"ulong", tok!"ushort",
        tok!"void", tok!"wchar");

/**
 * Returns: true if the given ID is for a basic type.
 */
public bool isBasicType(IdType type) nothrow pure @safe @nogc
{
    switch (type)
    {
    foreach (T; BasicTypes)
    {
    case T:
        return true;
    }
    default:
        return false;
    }
}

/**
 * Number literal token types.
 */
public alias NumberLiterals = AliasSeq!(tok!"doubleLiteral",
        tok!"floatLiteral", tok!"idoubleLiteral", tok!"ifloatLiteral",
        tok!"intLiteral", tok!"longLiteral", tok!"realLiteral",
        tok!"irealLiteral", tok!"uintLiteral", tok!"ulongLiteral");

/**
 * Returns: true if the given ID type is for a number literal.
 */
public bool isNumberLiteral(IdType type) nothrow pure @safe @nogc
{
    switch (type)
    {
    foreach (T; NumberLiterals)
    {
    case T:
        return true;
    }
    default:
        return false;
    }
}

/**
 * Number literal token types.
 */
public alias IntegerLiterals = AliasSeq!(tok!"intLiteral", tok!"longLiteral",
        tok!"uintLiteral", tok!"ulongLiteral");

/**
 * Returns: true if the given ID type is for a integer literal.
 */
public bool isIntegerLiteral(IdType type) nothrow pure @safe @nogc
{
    switch (type)
    {
    foreach (T; IntegerLiterals)
    {
    case T:
        return true;
    }
    default:
        return false;
    }
}

/**
 * Operator token types.
 */
public alias Operators = AliasSeq!(tok!",", tok!".", tok!"..", tok!"...",
        tok!"/", tok!"/=", tok!"!", tok!"!<", tok!"!<=", tok!"!<>",
        tok!"!<>=", tok!"!=", tok!"!>", tok!"!>=", tok!"$", tok!"%",
        tok!"%=", tok!"&", tok!"&&", tok!"&=", tok!"(", tok!")",
        tok!"*", tok!"*=", tok!"+", tok!"++", tok!"+=", tok!"-",
        tok!"--", tok!"-=", tok!":", tok!";", tok!"<", tok!"<<",
        tok!"<<=", tok!"<=", tok!"<>", tok!"<>=", tok!"=", tok!"==",
        tok!"=>", tok!">", tok!">=", tok!">>", tok!">>=", tok!">>>",
        tok!">>>=", tok!"?", tok!"@", tok!"[", tok!"]", tok!"^",
        tok!"^=", tok!"^^", tok!"^^=", tok!"{", tok!"|", tok!"|=",
        tok!"||", tok!"}", tok!"~", tok!"~=");

/**
 * Returns: true if the given ID type is for an operator.
 */
public bool isOperator(IdType type) nothrow pure @safe @nogc
{
    switch (type)
    {
    foreach (T; Operators)
    {
    case T:
        return true;
    }
    default:
        return false;
    }
}

/**
 * Keyword token types.
 */
public alias Keywords = AliasSeq!(tok!"abstract", tok!"alias", tok!"align",
        tok!"asm", tok!"assert", tok!"auto", tok!"break",
        tok!"case", tok!"cast", tok!"catch", tok!"class", tok!"const",
        tok!"continue", tok!"debug", tok!"default", tok!"delegate",
        tok!"delete", tok!"deprecated", tok!"do", tok!"else", tok!"enum",
        tok!"export", tok!"extern", tok!"false", tok!"final", tok!"finally",
        tok!"for", tok!"foreach", tok!"foreach_reverse", tok!"function",
        tok!"goto", tok!"if", tok!"immutable", tok!"import", tok!"in",
        tok!"inout", tok!"interface", tok!"invariant", tok!"is",
        tok!"lazy", tok!"macro", tok!"mixin", tok!"module", tok!"new",
        tok!"nothrow", tok!"null", tok!"out", tok!"override", tok!"package",
        tok!"pragma", tok!"private", tok!"protected", tok!"public",
        tok!"pure", tok!"ref", tok!"return", tok!"scope", tok!"shared",
        tok!"static", tok!"struct", tok!"super", tok!"switch", tok!"synchronized",
        tok!"template", tok!"this", tok!"throw", tok!"true", tok!"try",
        tok!"typedef", tok!"typeid", tok!"typeof", tok!"union", tok!"unittest",
        tok!"version", tok!"while", tok!"with", tok!"__DATE__",
        tok!"__EOF__", tok!"__FILE__", tok!"__FILE_FULL_PATH__", tok!"__FUNCTION__",
        tok!"__gshared", tok!"__LINE__", tok!"__MODULE__", tok!"__parameters",
        tok!"__PRETTY_FUNCTION__", tok!"__TIME__", tok!"__TIMESTAMP__",
        tok!"__traits", tok!"__vector", tok!"__VENDOR__", tok!"__VERSION__");

/**
 * Returns: true if the given ID type is for a keyword.
 */
public bool isKeyword(IdType type) pure nothrow @safe @nogc
{
    switch (type)
    {
    foreach (T; Keywords)
    {
    case T:
        return true;
    }
    default:
        return false;
    }
}

/**
 * String literal token types
 */
public alias StringLiterals = AliasSeq!(tok!"dstringLiteral",
        tok!"stringLiteral", tok!"wstringLiteral");

/**
 * Returns: true if the given ID type is for a string literal.
 */
public bool isStringLiteral(IdType type) pure nothrow @safe @nogc
{
    switch (type)
    {
    foreach (T; StringLiterals)
    {
    case T:
        return true;
    }
    default:
        return false;
    }
}

/**
 * Protection token types.
 */
public alias Protections = AliasSeq!(tok!"export", tok!"package",
        tok!"private", tok!"public", tok!"protected");

/**
 * Returns: true if the given ID type is for a protection attribute.
 */
public bool isProtection(IdType type) pure nothrow @safe @nogc
{
    switch (type)
    {
    foreach (T; Protections)
    {
    case T:
        return true;
    }
    default:
        return false;
    }
}

public alias SpecialTokens = AliasSeq!(tok!"__DATE__", tok!"__TIME__",
    tok!"__TIMESTAMP__", tok!"__VENDOR__", tok!"__VERSION__", tok!"__FILE__",
    tok!"__FILE_FULL_PATH__", tok!"__LINE__", tok!"__MODULE__",
    tok!"__FUNCTION__", tok!"__PRETTY_FUNCTION__");

public bool isSpecialToken(IdType type) pure nothrow @safe @nogc
{
    switch (type)
    {
    foreach (T; SpecialTokens)
    {
    case T:
        return true;
    }
    default:
        return false;
    }
}

public alias Literals = AliasSeq!(StringLiterals, NumberLiterals, tok!"characterLiteral",
        SpecialTokens, tok!"true", tok!"false", tok!"null", tok!"$");

public bool isLiteral(IdType type) pure nothrow @safe @nogc
{
    switch (type)
    {
    foreach (T; Literals)
    {
    case T:
        return true;
    }
    default:
        return false;
    }
}

/**
 * Returns: an array of tokens lexed from the given source code to the output
 * range. All whitespace, comment and specialTokenSequence tokens (trivia) are
 * attached to the token nearest to them.
 *
 * Trivia is put on the last token as `trailingTrivia` if it is on the same
 * line as the trivia, otherwise it will be attached to the next token in the
 * `leadingTrivia` until there is the EOF, where it will be attached as
 * `trailingTrivia` again.
 */
const(Token)[] getTokensForParser(R)(R sourceCode, LexerConfig config, StringCache* cache)
if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)
{
    config.whitespaceBehavior = WhitespaceBehavior.include;
    config.commentBehavior = CommentBehavior.noIntern;

    auto leadingTriviaAppender = appender!(Token[])();
    leadingTriviaAppender.reserve(128);
    auto trailingTriviaAppender = appender!(Token[])();
    trailingTriviaAppender.reserve(128);

    auto output = appender!(typeof(return))();
    auto lexer = DLexer(sourceCode, config, cache);
    loop: while (!lexer.empty) switch (lexer.front.type)
    {
    case tok!"specialTokenSequence":
    case tok!"whitespace":
    case tok!"comment":
        if (!output.data.empty && lexer.front.line == output.data[$ - 1].line)
            trailingTriviaAppender.put(lexer.front);
        else
            leadingTriviaAppender.put(lexer.front);
        lexer.popFront();
        break;
    case tok!"__EOF__":
        break loop;
    default:
        Token t = lexer.front;
        lexer.popFront();

        if (!output.data.empty && !trailingTriviaAppender.data.empty)
            (cast() output.data[$ - 1].trailingTrivia) = trailingTriviaAppender.data.idup;
        t.leadingTrivia = leadingTriviaAppender.data.idup;
        leadingTriviaAppender.clear();
        trailingTriviaAppender.clear();

        output.put(t);
        break;
    }

    if (!output.data.empty)
    {
        trailingTriviaAppender.put(leadingTriviaAppender.data);
        (cast() output.data[$ - 1].trailingTrivia) = trailingTriviaAppender.data.idup;
    }

    return output.data;
}

/**
 * The D lexer struct.
 */
public struct DLexer
{
    mixin Lexer!(Token, lexIdentifier, isSeparating, operators, dynamicTokens,
        keywords, pseudoTokenHandlers);

    ///
    @disable this();

    /**
     * Params:
     *     range = the bytes that compose the source code that will be lexed.
     *     config = the lexer configuration to use.
     *     cache = the string interning cache for de-duplicating identifiers and
     *         other token text.
     *     haveSSE42 = Parse streaming SIMD Extensions 4.2 in inline assembly
     */
    this(R)(R range, const LexerConfig config, StringCache* cache,
        bool haveSSE42 = sse42()) pure nothrow @safe
    if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)
    {
        this.haveSSE42 = haveSSE42;
        auto r = (range.length >= 3 && range[0] == 0xef && range[1] == 0xbb && range[2] == 0xbf)
            ? range[3 .. $] : range;
        static if (is(ElementEncodingType!R == immutable))
            this.range = LexerRange(cast(const(ubyte)[]) r);
        else
            this.range = LexerRange(cast(const(ubyte)[]) r.idup);
        this.config = config;
        this.cache = cache;
        popFront();
    }

    ///
    public void popFront()() pure nothrow @safe
    {
        do
            _popFront();
        while (config.whitespaceBehavior == WhitespaceBehavior.skip
            && _front.type == tok!"whitespace");
    }

    /**
     * Lexer error/warning message.
     */
    static struct Message
    {
        /// 1-based line number
        size_t line;
        /// 1-based byte offset
        size_t column;
        /// Text of the message
        string message;
        /// `true` for an error, `false` for a warning
        bool isError;
    }

    /**
     * Returns: An array of all of the warnings and errors generated so far
     *     during lexing. It may make sense to only check this when `empty`
     *     returns `true`.
     */
    const(Message[]) messages() const @property
    {
        return _messages;
    }

private pure nothrow @safe:

    bool isWhitespace()
    {
        switch (range.bytes[range.index])
        {
        case ' ':
        case '\r':
        case '\n':
        case '\t':
        case '\v':
        case '\f':
            return true;
        case 0xe2:
            auto peek = range.peek(2);
            return peek.length == 2
                && peek[0] == 0x80
                && (peek[1] == 0xa8 || peek[1] == 0xa9);
        default:
            return false;
        }
    }

    void popFrontWhitespaceAware()
    {
        switch (range.bytes[range.index])
        {
        case '\r':
            range.popFront();
            if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '\n')
            {
                range.popFront();
                range.incrementLine();
            }
            else
                range.incrementLine();
            return;
        case '\n':
            range.popFront();
            range.incrementLine();
            return;
        case 0xe2:
            auto lookahead = range.peek(3);
            if (lookahead.length == 3 && lookahead[1] == 0x80
                && (lookahead[2] == 0xa8 || lookahead[2] == 0xa9))
            {
                range.index+=3;
                range.column+=3;
                range.incrementLine();
                return;
            }
            else
            {
                range.popFront();
                return;
            }
        default:
            range.popFront();
            return;
        }
    }

    void lexWhitespace(ref Token token) @trusted
    {
        mixin (tokenStart);
        loop: do
        {
            version (X86_64)
            {
                if (haveSSE42 && range.index + 16 < range.bytes.length)
                {
                    skip!(true, '\t', ' ', '\v', '\f')(range.bytes.ptr + range.index,
                        &range.index, &range.column);
                }
            }
            switch (range.bytes[range.index])
            {
            case '\r':
                range.popFront();
                if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '\n')
                {
                    range.popFront();
                }
                range.column = 1;
                range.line += 1;
                break;
            case '\n':
                range.popFront();
                range.column = 1;
                range.line += 1;
                break;
            case ' ':
            case '\t':
            case '\v':
            case '\f':
                range.popFront();
                break;
            case 0xe2:
                if (range.index + 2 >= range.bytes.length)
                    break loop;
                if (range.bytes[range.index + 1] != 0x80)
                    break loop;
                if (range.bytes[range.index + 2] == 0xa8 || range.bytes[range.index + 2] == 0xa9)
                {
                    range.index += 3;
                    range.column += 3;
                    range.column = 1;
                    range.line += 1;
                    break;
                }
                break loop;
            default:
                break loop;
            }
        } while (!(range.index >= range.bytes.length));
        string text = config.whitespaceBehavior == WhitespaceBehavior.include
            ? cache.intern(range.slice(mark)) : "";
        token = Token(tok!"whitespace", text, line, column, index);
    }

    void lexNumber(ref Token token)
    {
        mixin (tokenStart);
        if (range.bytes[range.index] == '0' && range.index + 1 < range.bytes.length)
        {
            immutable ahead = range.bytes[range.index + 1];
            switch (ahead)
            {
            case 'x':
            case 'X':
                range.index += 2;
                range.column += 2;
                lexHex(token, mark, line, column, index);
                return;
            case 'b':
            case 'B':
                range.index += 2;
                range.column += 2;
                lexBinary(token, mark, line, column, index);
                return;
            default:
                lexDecimal(token, mark, line, column, index);
                return;
            }
        }
        else
            lexDecimal(token, mark, line, column, index);
    }

    void lexHex(ref Token token)
    {
        mixin (tokenStart);
        lexHex(token, mark, line, column, index);
    }

    void lexHex(ref Token token, size_t mark, size_t line, size_t column,
        size_t index) @trusted
    {
        IdType type = tok!"intLiteral";
        bool foundDot;
        hexLoop: while (!(range.index >= range.bytes.length))
        {
            switch (range.bytes[range.index])
            {
            case 'a': .. case 'f':
            case 'A': .. case 'F':
            case '0': .. case '9':
            case '_':
                version (X86_64)
                {
                    if (haveSSE42 && range.index + 16 < range.bytes.length)
                    {
                        immutable ulong i = rangeMatch!(false, '0', '9', 'a', 'f', 'A', 'F', '_', '_')
                            (range.bytes.ptr + range.index);
                        range.column += i;
                        range.index += i;
                    }
                    else
                        range.popFront();
                }
                else
                    range.popFront();
                break;
            case 'u':
            case 'U':
                lexIntSuffix(type);
                break hexLoop;
            case 'i':
                if (foundDot)
                    lexFloatSuffix(type);
                break hexLoop;
            case 'L':
                if (foundDot)
                    lexFloatSuffix(type);
                else
                    lexIntSuffix(type);
                break hexLoop;
            case 'p':
            case 'P':
                lexExponent(type);
                break hexLoop;
            case '.':
                if (foundDot || !(range.index + 1 < range.bytes.length) || range.peekAt(1) == '.')
                    break hexLoop;
                else
                {
                    // The following bit of silliness tries to tell the
                    // difference between "int dot identifier" and
                    // "double identifier".
                    if (range.index + 1 < range.bytes.length)
                    {
                        switch (range.peekAt(1))
                        {
                        case '0': .. case '9':
                        case 'A': .. case 'F':
                        case 'a': .. case 'f':
                            goto doubleLiteral;
                        default:
                            break hexLoop;
                        }
                    }
                    else
                    {
                    doubleLiteral:
                        range.popFront();
                        foundDot = true;
                        type = tok!"doubleLiteral";
                    }
                }
                break;
            default:
                break hexLoop;
            }
        }
        token = Token(type, cache.intern(range.slice(mark)), line, column,
            index);
    }

    void lexBinary(ref Token token)
    {
        mixin (tokenStart);
        return lexBinary(token, mark, line, column, index);
    }

    void lexBinary(ref Token token, size_t mark, size_t line, size_t column,
        size_t index) @trusted
    {
        IdType type = tok!"intLiteral";
        binaryLoop: while (!(range.index >= range.bytes.length))
        {
            switch (range.bytes[range.index])
            {
            case '0':
            case '1':
            case '_':
                version (X86_64)
                {
                    if (haveSSE42 && range.index + 16 < range.bytes.length)
                    {
                        immutable ulong i = rangeMatch!(false, '0', '1', '_', '_')(
                            range.bytes.ptr + range.index);
                        range.column += i;
                        range.index += i;
                    }
                    else
                        range.popFront();
                }
                else
                    range.popFront();
                break;
            case 'u':
            case 'U':
            case 'L':
                lexIntSuffix(type);
                break binaryLoop;
            default:
                break binaryLoop;
            }
        }
        token = Token(type, cache.intern(range.slice(mark)), line, column,
            index);
    }

    void lexDecimal(ref Token token)
    {
        mixin (tokenStart);
        lexDecimal(token, mark, line, column, index);
    }

    void lexDecimal(ref Token token, size_t mark, size_t line, size_t column,
        size_t index) @trusted
    {
        bool foundDot = range.bytes[range.index] == '.';
        IdType type = tok!"intLiteral";
        if (foundDot)
        {
            range.popFront();
            type = tok!"doubleLiteral";
        }

        decimalLoop: while (!(range.index >= range.bytes.length))
        {
            switch (range.bytes[range.index])
            {
            case '0': .. case '9':
            case '_':
                version (X86_64)
                {
                    if (haveSSE42 && range.index + 16 < range.bytes.length)
                    {
                        immutable ulong i = rangeMatch!(false, '0', '9', '_', '_')(range.bytes.ptr + range.index);
                        range.column += i;
                        range.index += i;
                    }
                    else
                        range.popFront();
                }
                else
                    range.popFront();
                break;
            case 'u':
            case 'U':
                if (!foundDot)
                    lexIntSuffix(type);
                break decimalLoop;
            case 'i':
                lexFloatSuffix(type);
                break decimalLoop;
            case 'L':
                if (foundDot)
                    lexFloatSuffix(type);
                else
                    lexIntSuffix(type);
                break decimalLoop;
            case 'f':
            case 'F':
                lexFloatSuffix(type);
                break decimalLoop;
            case 'e':
            case 'E':
                lexExponent(type);
                break decimalLoop;
            case '.':
                if (foundDot || !(range.index + 1 < range.bytes.length) || range.peekAt(1) == '.')
                    break decimalLoop;
                else
                {
                    // The following bit of silliness tries to tell the
                    // difference between "int dot identifier" and
                    // "double identifier".
                    if (range.index + 1 < range.bytes.length)
                    {
                        immutable ch = range.peekAt(1);
                        if (ch <= 0x2f
                            || (ch >= '0' && ch <= '9')
                            || (ch >= ':' && ch <= '@')
                            || (ch >= '[' && ch <= '^')
                            || (ch >= '{' && ch <= '~')
                            || ch == '`' || ch == '_')
                        {
                            goto doubleLiteral;
                        }
                        else
                            break decimalLoop;
                    }
                    else
                    {
                    doubleLiteral:
                        range.popFront();
                        foundDot = true;
                        type = tok!"doubleLiteral";
                    }
                }
                break;
            default:
                break decimalLoop;
            }
        }
        token = Token(type, cache.intern(range.slice(mark)), line, column,
            index);
    }

    void lexIntSuffix(ref IdType type) pure nothrow @safe
    {
        bool secondPass;
        if (range.bytes[range.index] == 'u' || range.bytes[range.index] == 'U')
        {
    U:
            if (type == tok!"intLiteral")
                type = tok!"uintLiteral";
            else
                type = tok!"ulongLiteral";
            range.popFront();
            if (secondPass)
                return;
            if (range.index < range.bytes.length
                    && (range.bytes[range.index] == 'L' || range.bytes[range.index] == 'l'))
                goto L;
            goto I;
        }
        if (range.bytes[range.index] == 'L' || range.bytes[range.index] == 'l')
        {
    L:
            if (type == tok!"uintLiteral")
                type = tok!"ulongLiteral";
            else
                type = tok!"longLiteral";
            range.popFront();
            if (range.index < range.bytes.length
                    && (range.bytes[range.index] == 'U' || range.bytes[range.index] == 'u'))
            {
                secondPass = true;
                goto U;
            }
            goto I;
        }
    I:
        if (range.index < range.bytes.length && range.bytes[range.index] == 'i')
        {
            warning("Complex number literals are deprecated");
            range.popFront();
            if (type == tok!"longLiteral" || type == tok!"ulongLiteral")
                type = tok!"idoubleLiteral";
            else
                type = tok!"ifloatLiteral";
        }
    }

    void lexFloatSuffix(ref IdType type) pure nothrow @safe
    {
        switch (range.bytes[range.index])
        {
        case 'L':
            range.popFront();
            type = tok!"doubleLiteral";
            break;
        case 'f':
        case 'F':
            range.popFront();
            type = tok!"floatLiteral";
            break;
        default:
            break;
        }
        if (range.index < range.bytes.length && range.bytes[range.index] == 'i')
        {
            warning("Complex number literals are deprecated");
            range.popFront();
            if (type == tok!"floatLiteral")
                type = tok!"ifloatLiteral";
            else
                type = tok!"idoubleLiteral";
        }
    }

    void lexExponent(ref IdType type) pure nothrow @safe
    {
        range.popFront();
        bool foundSign = false;
        bool foundDigit = false;
        while (range.index < range.bytes.length)
        {
            switch (range.bytes[range.index])
            {
            case '-':
            case '+':
                if (foundSign)
                {
                    if (!foundDigit)
                    error("Expected an exponent");
                    return;
                }
                foundSign = true;
                range.popFront();
                break;
            case '0': .. case '9':
            case '_':
                foundDigit = true;
                range.popFront();
                break;
            case 'L':
            case 'f':
            case 'F':
            case 'i':
                lexFloatSuffix(type);
                return;
            default:
                if (!foundDigit)
                    error("Expected an exponent");
                return;
            }
        }
    }

    void lexScriptLine(ref Token token)
    {
        mixin (tokenStart);
        while (!(range.index >= range.bytes.length) && !isNewline)
        {
            range.popFront();
        }
        token = Token(tok!"scriptLine", cache.intern(range.slice(mark)),
            line, column, index);
    }

    void lexSpecialTokenSequence(ref Token token)
    {
        mixin (tokenStart);
        while (!(range.index >= range.bytes.length) && !isNewline)
        {
            range.popFront();
        }
        token = Token(tok!"specialTokenSequence", cache.intern(range.slice(mark)),
            line, column, index);
    }

    void lexSlashStarComment(ref Token token) @trusted
    {
        mixin (tokenStart);
        IdType type = tok!"comment";
        range.popFrontN(2);
        while (range.index < range.bytes.length)
        {
            version (X86_64)
            {
                if (haveSSE42 && range.index + 16 < range.bytes.length)
                    skip!(false, '\r', '\n', '/', '*', 0xe2)(range.bytes.ptr + range.index,
                        &range.index, &range.column);
            }
            if (range.bytes[range.index] == '*')
            {
                range.popFront();
                if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '/')
                {
                    range.popFront();
                    break;
                }
            }
            else
                popFrontWhitespaceAware();
        }
        if (config.commentBehavior == CommentBehavior.intern)
            token = Token(type, cache.intern(range.slice(mark)), line, column, index);
        else
            token = Token(type, cast(string) range.slice(mark), line, column, index);
    }

    void lexSlashSlashComment(ref Token token) @trusted
    {
        mixin (tokenStart);
        IdType type = tok!"comment";
        range.popFrontN(2);
        while (range.index < range.bytes.length)
        {
            version (X86_64)
            {
                if (haveSSE42 && range.index + 16 < range.bytes.length)
                {
                    skip!(false, '\r', '\n', 0xe2)(range.bytes.ptr + range.index,
                        &range.index, &range.column);
                }
            }
            if (range.bytes[range.index] == '\r' || range.bytes[range.index] == '\n')
                break;
            range.popFront();
        }
        if (config.commentBehavior == CommentBehavior.intern)
            token = Token(type, cache.intern(range.slice(mark)), line, column, index);
        else
            token = Token(type, cast(string) range.slice(mark), line, column, index);
    }

    void lexSlashPlusComment(ref Token token) @trusted
    {
        mixin (tokenStart);
        IdType type = tok!"comment";
        range.index += 2;
        range.column += 2;
        int depth = 1;
        while (depth > 0 && !(range.index >= range.bytes.length))
        {
            version (X86_64)
            {
                if (haveSSE42 && range.index + 16 < range.bytes.length)
                {
                    skip!(false, '+', '/', '\\', '\r', '\n', 0xe2)(range.bytes.ptr + range.index,
                        &range.index, &range.column);
                }
            }
            if (range.bytes[range.index] == '+')
            {
                range.popFront();
                if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '/')
                {
                    range.popFront();
                    depth--;
                }
            }
            else if (range.bytes[range.index] == '/')
            {
                range.popFront();
                if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '+')
                {
                    range.popFront();
                    depth++;
                }
            }
            else
                popFrontWhitespaceAware();
        }
        if (config.commentBehavior == CommentBehavior.intern)
            token = Token(type, cache.intern(range.slice(mark)), line, column, index);
        else
            token = Token(type, cast(string) range.slice(mark), line, column, index);
    }

    void lexStringLiteral(ref Token token) @trusted
    {
        mixin (tokenStart);
        range.popFront();
        while (true)
        {
            if (range.index >= range.bytes.length)
            {
                error("Error: unterminated string literal");
                token = Token(tok!"");
                return;
            }
            version (X86_64)
            {
                if (haveSSE42 && range.index + 16 < range.bytes.length)
                {
                    skip!(false, '"', '\\', '\r', '\n', 0xe2)(range.bytes.ptr + range.index,
                        &range.index, &range.column);
                }
            }
            if (range.bytes[range.index] == '"')
            {
                range.popFront();
                break;
            }
            else if (range.bytes[range.index] == '\\')
            {
                if (!lexEscapeSequence())
                {
                    token = Token.init;
                    return;
                }
            }
            else
                popFrontWhitespaceAware();
        }
        IdType type = tok!"stringLiteral";
        lexStringSuffix(type);
        token = Token(type, cache.intern(range.slice(mark)), line, column,
            index);
    }

    void lexWysiwygString(ref Token token) @trusted
    {
        mixin (tokenStart);
        IdType type = tok!"stringLiteral";
        immutable bool backtick = range.bytes[range.index] == '`';
        if (backtick)
        {
            range.popFront();
            while (true)
            {
                if (range.index >= range.bytes.length)
                {
                    error("Error: unterminated string literal");
                    token = Token(tok!"");
                    return;
                }
                version (X86_64)
                {
                    if (haveSSE42 && range.index + 16 < range.bytes.length)
                    {
                        skip!(false, '\r', '\n', 0xe2, '`')(range.bytes.ptr + range.index,
                            &range.index, &range.column);
                    }
                }
                if (range.bytes[range.index] == '`')
                {
                    range.popFront();
                    break;
                }
                else
                    popFrontWhitespaceAware();
            }
        }
        else
        {
            range.popFront();
            if (range.index >= range.bytes.length)
            {
                error("Error: unterminated string literal");
                token = Token(tok!"");
                return;
            }
            range.popFront();
            while (true)
            {
                if (range.index >= range.bytes.length)
                {
                    error("Error: unterminated string literal");
                    token = Token(tok!"");
                    return;
                }
                else if (range.bytes[range.index] == '"')
                {
                    range.popFront();
                    break;
                }
                else
                    popFrontWhitespaceAware();
            }
        }
        lexStringSuffix(type);
        token = Token(type, cache.intern(range.slice(mark)), line, column,
            index);
    }

    private ubyte lexStringSuffix(ref IdType type) pure nothrow @safe
    {
        if (range.index >= range.bytes.length)
        {
            type = tok!"stringLiteral";
            return 0;
        }
        else
        {
            switch (range.bytes[range.index])
            {
            case 'w': range.popFront(); type = tok!"wstringLiteral"; return 'w';
            case 'd': range.popFront(); type = tok!"dstringLiteral"; return 'd';
            case 'c': range.popFront(); type = tok!"stringLiteral"; return 'c';
            default: type = tok!"stringLiteral"; return 0;
            }
        }
    }

    void lexDelimitedString(ref Token token)
    {
        mixin (tokenStart);
        range.index += 2;
        range.column += 2;
        ubyte open;
        ubyte close;
        switch (range.bytes[range.index])
        {
        case '<':
            open = '<';
            close = '>';
            range.popFront();
            lexNormalDelimitedString(token, mark, line, column, index, open, close);
            break;
        case '{':
            open = '{';
            close = '}';
            range.popFront();
            lexNormalDelimitedString(token, mark, line, column, index, open, close);
            break;
        case '[':
            open = '[';
            close = ']';
            range.popFront();
            lexNormalDelimitedString(token, mark, line, column, index, open, close);
            break;
        case '(':
            open = '(';
            close = ')';
            range.popFront();
            lexNormalDelimitedString(token, mark, line, column, index, open, close);
            break;
        default:
            lexHeredocString(token, mark, line, column, index);
            break;
        }
    }

    void lexNormalDelimitedString(ref Token token, size_t mark, size_t line, size_t column,
        size_t index, ubyte open, ubyte close)
    {
        int depth = 1;
        while (!(range.index >= range.bytes.length) && depth > 0)
        {
            if (range.bytes[range.index] == open)
            {
                depth++;
                range.popFront();
            }
            else if (range.bytes[range.index] == close)
            {
                depth--;
                range.popFront();
                if (depth <= 0)
                {
                    if (range.bytes[range.index] == '"')
                    {
                        range.popFront();
                    }
                    else
                    {
                        error("Error: `\"` expected to end delimited string literal");
                        token = Token(tok!"");
                        return;
                    }
                }
            }
            else
                popFrontWhitespaceAware();
        }
        IdType type = tok!"stringLiteral";
        lexStringSuffix(type);
        token = Token(type, cache.intern(range.slice(mark)), line, column, index);
    }

    void lexHeredocString(ref Token token, size_t mark, size_t line, size_t column, size_t index)
    {
        Token ident;
        lexIdentifier(ident);
        if (isNewline())
            popFrontWhitespaceAware();
        else
            error("Newline expected");
        while (!(range.index >= range.bytes.length))
        {
            if (isNewline())
            {
                popFrontWhitespaceAware();
                if (!range.canPeek(ident.text.length))
                {
                    error(ident.text ~ " expected");
                    break;
                }
                if (range.peek(ident.text.length - 1) == ident.text)
                {
                    range.popFrontN(ident.text.length);
                    break;
                }
            }
            else
            {
                range.popFront();
            }
        }
        if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '"')
        {
            range.popFront();
        }
        else
            error("`\"` expected");
        IdType type = tok!"stringLiteral";
        lexStringSuffix(type);
        token = Token(type, cache.intern(range.slice(mark)), line, column, index);
    }

    void lexTokenString(ref Token token)
    {
        mixin (tokenStart);
        assert (range.bytes[range.index] == 'q');
        range.popFront();
        assert (range.bytes[range.index] == '{');
        range.popFront();
        auto app = appender!string();
        app.put("q{");
        int depth = 1;

        immutable WhitespaceBehavior oldWhitespace = config.whitespaceBehavior;
        immutable StringBehavior oldString = config.stringBehavior;
        config.whitespaceBehavior = WhitespaceBehavior.include;
        config.stringBehavior = StringBehavior.source;
        scope (exit)
        {
            config.whitespaceBehavior = oldWhitespace;
            config.stringBehavior = oldString;
        }

        advance(_front);
        while (depth > 0 && !empty)
        {
            auto t = front();
            if (t.text is null)
                app.put(str(t.type));
            else
                app.put(t.text);
            if (t.type == tok!"}")
            {
                depth--;
                if (depth > 0)
                popFront();
            }
            else if (t.type == tok!"{")
            {
                depth++;
                popFront();
            }
            else
                popFront();
        }
        IdType type = tok!"stringLiteral";
        auto b = lexStringSuffix(type);
        if (b != 0)
            app.put(b);
        token = Token(type, cache.intern(cast(const(ubyte)[]) app.data), line,
            column, index);
    }

    void lexHexString(ref Token token)
    {
        mixin (tokenStart);
        range.index += 2;
        range.column += 2;

        loop: while (true)
        {
            if (range.index >= range.bytes.length)
            {
                error("Error: unterminated hex string literal");
                token = Token(tok!"");
                return;
            }
            else if (isWhitespace())
                popFrontWhitespaceAware();
            else switch (range.bytes[range.index])
            {
            case '0': .. case '9':
            case 'A': .. case 'F':
            case 'a': .. case 'f':
                range.popFront();
                break;
            case '"':
                range.popFront();
                break loop;
            default:
                error("Error: invalid character in hex string");
                token = Token(tok!"");
                return;
            }
        }

        IdType type = tok!"stringLiteral";
        lexStringSuffix(type);
        token = Token(type, cache.intern(range.slice(mark)), line, column,
            index);
    }

    bool lexNamedEntity()
    in { assert (range.bytes[range.index] == '&'); }
    do
    {
        Token t;
        range.popFront();
        lexIdentifier(t, true);
        if (t.type != tok!"identifier" || range.empty || range.bytes[range.index] != ';')
        {
            error("Error: invalid named character entity");
            return false;
        }
        range.popFront();
        return true;
    }

    bool lexEscapeSequence()
    {
        range.popFront();
        if (range.index >= range.bytes.length)
        {
            error("Error: non-terminated character escape sequence.");
            return false;
        }
        switch (range.bytes[range.index])
        {
        case '&': return lexNamedEntity();
        case '\'':
        case '"':
        case '?':
        case '\\':
        case 'a':
        case 'b':
        case 'f':
        case 'n':
        case 'r':
        case 't':
        case 'v':
            range.popFront();
            break;
        case 'x':
            range.popFront();
            foreach (i; 0 .. 2)
            {
                if (range.index >= range.bytes.length)
                {
                    error("Error: 2 hex digits expected.");
                    return false;
                }
                switch (range.bytes[range.index])
                {
                case '0': .. case '9':
                case 'a': .. case 'f':
                case 'A': .. case 'F':
                    range.popFront();
                    break;
                default:
                    error("Error: 2 hex digits expected.");
                    return false;
                }
            }
            break;
        case '0':
            if (!(range.index + 1 < range.bytes.length)
                || ((range.index + 1 < range.bytes.length) && range.peekAt(1) == '\''))
            {
                range.popFront();
                break;
            }
            goto case;
        case '1': .. case '7':
            for (size_t i = 0; i < 3 && !(range.index >= range.bytes.length)
                    && range.bytes[range.index] >= '0' && range.bytes[range.index] <= '7'; i++)
                range.popFront();
            break;
        case 'u':
            range.popFront();
            foreach (i; 0 .. 4)
            {
                if (range.index >= range.bytes.length)
                {
                    error("Error: at least 4 hex digits expected.");
                    return false;
                }
                switch (range.bytes[range.index])
                {
                case '0': .. case '9':
                case 'a': .. case 'f':
                case 'A': .. case 'F':
                    range.popFront();
                    break;
                default:
                    error("Error: at least 4 hex digits expected.");
                    return false;
                }
            }
            break;
        case 'U':
            range.popFront();
            foreach (i; 0 .. 8)
            {
                if (range.index >= range.bytes.length)
                {
                    error("Error: at least 8 hex digits expected.");
                    return false;
                }
                switch (range.bytes[range.index])
                {
                case '0': .. case '9':
                case 'a': .. case 'f':
                case 'A': .. case 'F':
                    range.popFront();
                    break;
                default:
                    error("Error: at least 8 hex digits expected.");
                    return false;
                }
            }
            break;
        default:
            error("Invalid escape sequence");
            while (true)
            {
                if (range.index >= range.bytes.length)
                {
                    error("Error: non-terminated character escape sequence.");
                    break;
                }
                if (range.bytes[range.index] == ';')
                {
                    range.popFront();
                    break;
                }
                else
                {
                    range.popFront();
                }
            }
            return false;
        }
        return true;
    }

    void lexCharacterLiteral(ref Token token)
    {
        mixin (tokenStart);
        range.popFront();
        if (range.empty)
            goto err;
        if (range.bytes[range.index] == '\\')
            lexEscapeSequence();
        else if (range.bytes[range.index] == '\'')
        {
            range.popFront();
            token = Token(tok!"characterLiteral", cache.intern(range.slice(mark)),
                line, column, index);
        }
        else if (range.bytes[range.index] & 0x80)
        {
            while (range.bytes[range.index] & 0x80)
                range.popFront();
        }
        else
            popFrontWhitespaceAware();

        if (range.index < range.bytes.length && range.bytes[range.index] == '\'')
        {
            range.popFront();
            token = Token(tok!"characterLiteral", cache.intern(range.slice(mark)),
                line, column, index);
        }
        else
        {
    err:
            error("Error: Expected `'` to end character literal");
            token = Token(tok!"");
        }
    }

    void lexIdentifier(ref Token token, const bool silent = false) @trusted
    {
        mixin (tokenStart);

        if (isSeparating(0))
        {
            if (silent) return;

            error("Invalid identifier");
            range.popFront();
        }
        while (true)
        {
            version (X86_64)
            {
                if (haveSSE42 && range.index + 16 < range.bytes.length)
                {
                    immutable ulong i = rangeMatch!(false, 'a', 'z', 'A', 'Z', '_', '_')
                        (range.bytes.ptr + range.index);
                    range.column += i;
                    range.index += i;
                }
            }
            if (isSeparating(0))
                break;
            else
                range.popFront();
        }
        token = Token(tok!"identifier", cache.intern(range.slice(mark)), line,
            column, index);
    }

    void lexDot(ref Token token)
    {
        mixin (tokenStart);
        if (!(range.index + 1 < range.bytes.length))
        {
            range.popFront();
            token = Token(tok!".", null, line, column, index);
            return;
        }
        switch (range.peekAt(1))
        {
        case '0': .. case '9':
            lexNumber(token);
            return;
        case '.':
            range.popFront();
            range.popFront();
            if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '.')
            {
                range.popFront();
                token = Token(tok!"...", null, line, column, index);
            }
            else
                token = Token(tok!"..", null, line, column, index);
            return;
        default:
            range.popFront();
            token = Token(tok!".", null, line, column, index);
            return;
        }
    }

    void lexLongNewline(ref Token token) @nogc
    {
        mixin (tokenStart);
        range.popFront();
        range.popFront();
        range.popFront();
        range.incrementLine();
        string text = config.whitespaceBehavior == WhitespaceBehavior.include
            ? cache.intern(range.slice(mark)) : "";
        token = Token(tok!"whitespace", text, line,
            column, index);
    }

    bool isNewline() @nogc
    {
        if (range.bytes[range.index] == '\n') return true;
        if (range.bytes[range.index] == '\r') return true;
        return (range.bytes[range.index] & 0x80) && (range.index + 2 < range.bytes.length)
            && (range.peek(2) == "\u2028" || range.peek(2) == "\u2029");
    }

    bool isSeparating(size_t offset) @nogc
    {
        enum : ubyte
        {
            n, y, m // no, yes, maybe
        }

        if (range.index + offset >= range.bytes.length)
            return true;
        auto c = range.bytes[range.index + offset];
        static immutable ubyte[256] LOOKUP_TABLE = [
            y, y, y, y, y, y, y, y, y, y, y, y, y, y, y, y,
            y, y, y, y, y, y, y, y, y, y, y, y, y, y, y, y,
            y, y, y, y, y, y, y, y, y, y, y, y, y, y, y, y,
            n, n, n, n, n, n, n, n, n, n, y, y, y, y, y, y,
            y, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n,
            n, n, n, n, n, n, n, n, n, n, n, y, y, y, y, n,
            y, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n,
            n, n, n, n, n, n, n, n, n, n, n, y, y, y, y, y,
            m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m,
            m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m,
            m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m,
            m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m,
            m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m,
            m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m,
            m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m,
            m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m
        ];
        immutable ubyte result = LOOKUP_TABLE[c];
        if (result == n)
            return false;
        if (result == y)
            return true;
        if (result == m)
        {
            auto r = range;
            range.popFrontN(offset);
            return (r.canPeek(2) && (r.peek(2) == "\u2028"
                || r.peek(2) == "\u2029"));
        }
        assert (false);
    }



    enum tokenStart = q{
        size_t index = range.index;
        size_t column = range.column;
        size_t line = range.line;
        auto mark = range.mark();
    };

    void error(string message)
    {
        _messages ~= Message(range.line, range.column, message, true);
    }

    void warning(string message)
    {
        _messages ~= Message(range.line, range.column, message, false);
        assert (_messages.length > 0);
    }

    Message[] _messages;
    StringCache* cache;
    LexerConfig config;
    bool haveSSE42;
}

/**
 * Creates a token range from the given source code. Creates a default lexer
 * configuration and a GC-managed string cache.
 */
public auto byToken(R)(R range)
if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)
{
    LexerConfig config;
    StringCache* cache = new StringCache(range.length.optimalBucketCount);
    return DLexer(range, config, cache);
}

/**
 * Creates a token range from the given source code. Uses the given string
 * cache.
 */
public auto byToken(R)(R range, StringCache* cache)
if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)
{
    LexerConfig config;
    return DLexer(range, config, cache);
}

/**
 * Creates a token range from the given source code. Uses the provided lexer
 * configuration and string cache.
 */
public auto byToken(R)(R range, const LexerConfig config, StringCache* cache)
if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)
{
    return DLexer(range, config, cache);
}

/**
 * Helper function used to avoid too much allocations while lexing.
 *
 * Params:
 *      size = The length in bytes of the source file.
 *
 * Returns:
 *      The optimal initial bucket count a `StringCache` should have.
 */
size_t optimalBucketCount(size_t size)
{
    import std.math : nextPow2;
    return nextPow2((size + 31U) / 32U).min(1U << 30U);
}
///
unittest
{
    assert(optimalBucketCount(1) == 2);
    assert(optimalBucketCount(9000 * 32) == 16384);
    static if (size_t.sizeof == ulong.sizeof)
        assert(optimalBucketCount(100_000_000_000UL) == 1 << 30);
}

/**
 * The string cache is used for string interning.
 *
 * It will only store a single copy of any string that it is asked to hold.
 * Interned strings can be compared for equality by comparing their $(B .ptr)
 * field.
 *
 * Default and postbilt constructors are disabled. When a StringCache goes out
 * of scope, the memory held by it is freed.
 *
 * See_also: $(LINK http://en.wikipedia.org/wiki/String_interning)
 */
struct StringCache
{
public pure nothrow @nogc:

    @disable this();
    @disable this(this);

    /**
     * Params: bucketCount = the initial number of buckets. Must be a
     * power of two
     */
    this(size_t bucketCount) nothrow @trusted @nogc
    in
    {
        import core.bitop : popcnt;
        static if (size_t.sizeof == 8)
        {
            immutable low = popcnt(cast(uint) bucketCount);
            immutable high = popcnt(cast(uint) (bucketCount >> 32));
            assert ((low == 0 && high == 1) || (low == 1 && high == 0));
        }
        else
        {
            static assert (size_t.sizeof == 4);
            assert (popcnt(cast(uint) bucketCount) == 1);
        }
    }
    do
    {
        buckets = (cast(Node**) calloc((Node*).sizeof, bucketCount))[0 .. bucketCount];
    }

    ~this()
    {
        Block* current = rootBlock;
        while (current !is null)
        {
            Block* prev = current;
            current = current.next;
            free(cast(void*) prev);
        }
        foreach (nodePointer; buckets)
        {
            Node* currentNode = nodePointer;
            while (currentNode !is null)
            {
                if (currentNode.mallocated)
                    free(currentNode.str.ptr);
                Node* prev = currentNode;
                currentNode = currentNode.next;
                free(prev);
            }
        }
        rootBlock = null;
        free(buckets.ptr);
        buckets = null;
    }

    /**
     * Caches a string.
     */
    string intern(const(ubyte)[] str) @safe
    {
        if (str is null || str.length == 0)
            return "";
        return _intern(str);
    }

    /**
     * ditto
     */
    string intern(string str) @trusted
    {
        return intern(cast(ubyte[]) str);
    }

    /**
     * The default bucket count for the string cache.
     */
    static enum defaultBucketCount = 4096;

private:

    string _intern(const(ubyte)[] bytes) @trusted
    {
        immutable uint hash = hashBytes(bytes);
        immutable size_t index = hash & (buckets.length - 1);
        Node* s = find(bytes, hash);
        if (s !is null)
            return cast(string) s.str;
        ubyte[] mem = void;
        bool mallocated = bytes.length > BIG_STRING;
        if (mallocated)
            mem = (cast(ubyte*) malloc(bytes.length))[0 .. bytes.length];
        else
            mem = allocate(bytes.length);
        mem[] = bytes[];
        Node* node = cast(Node*) malloc(Node.sizeof);
        node.str = mem;
        node.hash = hash;
        node.next = buckets[index];
        node.mallocated = mallocated;
        buckets[index] = node;
        return cast(string) mem;
    }

    Node* find(const(ubyte)[] bytes, uint hash) @trusted
    {
        import std.algorithm : equal;
        immutable size_t index = hash & (buckets.length - 1);
        Node* node = buckets[index];
        while (node !is null)
        {
            if (node.hash == hash && bytes == cast(ubyte[]) node.str)
                return node;
            node = node.next;
        }
        return node;
    }

    static uint hashBytes(const(ubyte)[] data) pure nothrow @trusted @nogc
    in
    {
        assert (data !is null);
        assert (data.length > 0);
    }
    do
    {
        immutable uint m = 0x5bd1e995;
        immutable int r = 24;
        uint h = cast(uint) data.length;
        while (data.length >= 4)
        {
            uint k = (cast(ubyte) data[3]) << 24
                | (cast(ubyte) data[2]) << 16
                | (cast(ubyte) data[1]) << 8
                | (cast(ubyte) data[0]);
            k *= m;
            k ^= k >> r;
            k *= m;
            h *= m;
            h ^= k;
            data = data[4 .. $];
        }
        switch (data.length & 3)
        {
        case 3:
            h ^= data[2] << 16;
            goto case;
        case 2:
            h ^= data[1] << 8;
            goto case;
        case 1:
            h ^= data[0];
            h *= m;
            break;
        default:
            break;
        }
        h ^= h >> 13;
        h *= m;
        h ^= h >> 15;
        return h;
    }

    ubyte[] allocate(size_t numBytes) pure nothrow @trusted @nogc
    in
    {
        assert (numBytes != 0);
    }
    out (result)
    {
        assert (result.length == numBytes);
    }
    do
    {
        Block* r = rootBlock;
        size_t i = 0;
        while  (i <= 3 && r !is null)
        {
            immutable size_t available = r.bytes.length;
            immutable size_t oldUsed = r.used;
            immutable size_t newUsed = oldUsed + numBytes;
            if (newUsed <= available)
            {
                r.used = newUsed;
                return r.bytes[oldUsed .. newUsed];
            }
            i++;
            r = r.next;
        }
        Block* b = cast(Block*) calloc(Block.sizeof, 1);
        b.used = numBytes;
        b.next = rootBlock;
        rootBlock = b;
        return b.bytes[0 .. numBytes];
    }

    static struct Node
    {
        ubyte[] str = void;
        Node* next = void;
        uint hash = void;
        bool mallocated = void;
    }

    static struct Block
    {
        Block* next;
        size_t used;
        enum BLOCK_CAPACITY = BLOCK_SIZE - size_t.sizeof - (void*).sizeof;
        ubyte[BLOCK_CAPACITY] bytes;
    }

    static assert (BLOCK_SIZE == Block.sizeof);

    enum BLOCK_SIZE = 1024 * 16;

    // If a string would take up more than 1/4 of a block, allocate it outside
    // of the block.
    enum BIG_STRING = BLOCK_SIZE / 4;

    Node*[] buckets;
    Block* rootBlock;
}

private extern(C) void* calloc(size_t, size_t) nothrow pure @nogc @trusted;
private extern(C) void* malloc(size_t) nothrow pure @nogc @trusted;
private extern(C) void free(void*) nothrow pure @nogc @trusted;

unittest
{
    auto source = cast(ubyte[]) q{ import std.stdio;}c;
    auto tokens = getTokensForParser(source, LexerConfig(),
        new StringCache(StringCache.defaultBucketCount));
    assert (tokens.map!"a.type"().equal([tok!"import", tok!"identifier", tok!".",
        tok!"identifier", tok!";"]));
}

/// Test \x char sequence
unittest
{
    auto toks = (string s) => byToken(cast(ubyte[])s);

    // valid
    immutable hex = ['0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','A','B','C','D','E','F'];
    auto source = "";
    foreach (h1; hex)
        foreach (h2; hex)
            source ~= "'\\x" ~ h1 ~ h2 ~ "'";
    assert (toks(source).filter!(t => t.type != tok!"characterLiteral").empty);

    // invalid
    assert (toks(`'\x'`).messages[0] == DLexer.Message(1,4,"Error: 2 hex digits expected.",true));
    assert (toks(`'\x_'`).messages[0] == DLexer.Message(1,4,"Error: 2 hex digits expected.",true));
    assert (toks(`'\xA'`).messages[0] == DLexer.Message(1,5,"Error: 2 hex digits expected.",true));
    assert (toks(`'\xAY'`).messages[0] == DLexer.Message(1,5,"Error: 2 hex digits expected.",true));
    assert (toks(`'\xXX'`).messages[0] == DLexer.Message(1,4,"Error: 2 hex digits expected.",true));
}

version (X86_64)
{
    version (DigitalMars)
        private enum useDMDStyle = true;
    else version (LDC)
        private enum useDMDStyle = (__VERSION__ < 2092); // GDC-style supported since v1.22
    else
        private enum useDMDStyle = false; // not supported by GDC

    private ulong pcmpestri(ubyte flags, chars...)(const ubyte* bytes) pure nothrow
        @trusted @nogc if (chars.length <= 8)
    {
        enum constant = ByteCombine!chars;
        enum charsLength = chars.length;

        static if (useDMDStyle)
        {
            asm pure nothrow @nogc
            {
                naked;
            }
            version (Windows) // `bytes` in RCX
                asm pure nothrow @nogc { movdqu XMM1, [RCX]; }
            else // `bytes` in RDI
                asm pure nothrow @nogc { movdqu XMM1, [RDI]; }
            asm pure nothrow @nogc
            {
                mov R10, constant;
                movq XMM2, R10;
                mov RAX, charsLength;
                mov RDX, 16;
                pcmpestri XMM2, XMM1, flags;
                mov RAX, RCX;
                ret;
            }
        }
        else // GDC-style inline asm (GCC basically)
        {
            ulong result;
            asm pure nothrow @nogc
            {
                `movdqu    %1, %%xmm1
                 movq      %3, %%xmm2
                 pcmpestri %5, %%xmm1, %%xmm2`
                : "=c" (result)   // %0: pcmpestri result in RCX, to be stored into `result`
                : "m" (*bytes),   // %1: address of `bytes` string
                  "d" (16),       // %2: length of `bytes` head in XMM1, as pcmpestri input in EDX
                  "r" (constant), // %3: max 8 `chars` to load into GP register, then XMM2
                  "a" (charsLength), // %4: length in XMM2, as pcmpestri input in EAX
                  "i" (flags)     // %5: `flags` immediate
                : "xmm1", "xmm2"; // clobbered registers
            }
            return result;
        }
    }

    /**
     * Skips between 0 and 16 bytes that match (or do not match) one of the
     * given $(B chars).
     */
    void skip(bool matching, chars...)(const ubyte* bytes, ulong* pindex, ulong* pcolumn) pure nothrow
        @trusted @nogc if (chars.length <= 8)
    {
        static if (matching)
            enum flags = 0b0001_0000;
        else
            enum flags = 0b0000_0000;

        const r = pcmpestri!(flags, chars)(bytes);
        *pindex += r;
        *pcolumn += r;
    }

    /**
     * Returns: the number of bytes starting at the given location that match
     *     (or do not match if $(B invert) is true) the byte ranges in $(B chars).
     */
    ulong rangeMatch(bool invert, chars...)(const ubyte* bytes) pure nothrow @trusted @nogc
    {
        static assert(chars.length % 2 == 0);
        static if (invert)
            enum rangeMatchFlags = 0b0000_0100;
        else
            enum rangeMatchFlags = 0b0001_0100;

        return pcmpestri!(rangeMatchFlags, chars)(bytes);
    }

    template ByteCombine(c...)
    {
        static assert (c.length <= 8);
        static if (c.length > 1)
            enum ulong ByteCombine = c[0] | (ByteCombine!(c[1..$]) << 8);
        else
            enum ulong ByteCombine = c[0];
    }
}

unittest
{
    import core.exception : RangeError;
    import std.exception : assertNotThrown;

    static immutable src1 = "/++";
    static immutable src2 = "/**";

    LexerConfig cf;
    StringCache ca = StringCache(16);

    assertNotThrown!RangeError(getTokensForParser(src1, cf, &ca));
    assertNotThrown!RangeError(getTokensForParser(src2, cf, &ca));
}

unittest
{
    static immutable src = `"\eeee"`;

    LexerConfig cf;
    StringCache ca = StringCache(16);

    auto l = DLexer(src, cf, &ca);
    assert(l.front().type == tok!"");
    assert(!l.messages.empty);
}

unittest
{
    alias Msg = DLexer.Message;
    LexerConfig cf;
    StringCache ca = StringCache(16);

    {
        auto l = DLexer(`"\©"`, cf, &ca);
        assert(l.front().type == tok!"stringLiteral");
        assert(l.messages == []);
    }
    {
        auto l = DLexer(`"\™\⌝"`, cf, &ca);
        assert(l.front().type == tok!"stringLiteral");
        assert(l.messages == []);
    }
    {
        auto l = DLexer(`"\&trade"`, cf, &ca);
        assert(l.front().type == tok!"");
        assert(l.messages == [ Msg(1, 9, "Error: invalid named character entity", true) ]);
    }
    {
        auto l = DLexer(`"\™\&urcorn"`, cf, &ca);
        assert(l.front().type == tok!"");
        assert(l.messages == [ Msg(1, 18, "Error: invalid named character entity", true) ]);
    }
    {
        auto l = DLexer(`"\&"`, cf, &ca);
        assert(l.front().type == tok!"");
        assert(l.messages == [ Msg(1, 4, "Error: invalid named character entity", true) ]);
    }
    {
        auto l = DLexer(`"\&0"`, cf, &ca);
        assert(l.front().type == tok!"");
        assert(l.messages == [ Msg(1, 5, "Error: invalid named character entity", true) ]);
    }
    {
        auto l = DLexer(`"\©`, cf, &ca);
        assert(l.front().type == tok!"");
        assert(l.messages == [ Msg(1, 8, "Error: invalid named character entity", true) ]);
    }
    {
        auto l = DLexer(`"\©`, cf, &ca);
        assert(l.front().type == tok!"");
        assert(l.messages == [ Msg(1, 9, "Error: unterminated string literal", true) ]);
    }
}

// legacy code using compatibility comment and trailingComment
unittest
{
    import std.conv : to;
    import std.exception : enforce;

    static immutable src = `/// this is a module.
// mixed
/// it can do stuff
module foo.bar;

// hello

/**
 * some doc
 * hello
 */
int x; /// very nice

// TODO: do stuff
void main() {
    #line 40
    /// could be better
    writeln(":)");
}

/// end of file`;

    LexerConfig cf;
    StringCache ca = StringCache(16);

    const tokens = getTokensForParser(src, cf, &ca);

    void assertEquals(T)(T a, T b, string what, string file = __FILE__, size_t line = __LINE__)
    {
        enforce(a == b, "Failed " ~ what ~ " '" ~ a.to!string ~ "' == '" ~ b.to!string ~ "'", file, line);
    }

    void test(size_t index, IdType type, string comment, string trailingComment,
            string file = __FILE__, size_t line = __LINE__)
    {
        assertEquals(tokens[index].type, type, "type", file, line);
        assertEquals(tokens[index].comment, comment, "comment", file, line);
        assertEquals(tokens[index].trailingComment, trailingComment, "trailingComment", file, line);
    }

    test(0, tok!"module", "this is a module.\nit can do stuff", "");
    test(1, tok!"identifier", "", "");
    test(2, tok!".", "", "");
    test(3, tok!"identifier", "", "");
    test(4, tok!";", "", "");
    test(5, tok!"int", "some doc\nhello", "");
    test(6, tok!"identifier", "", "");
    test(7, tok!";", "", "very nice");
    test(8, tok!"void", "", "");
    test(9, tok!"identifier", "", "");
    test(10, tok!"(", "", "");
    test(11, tok!")", "", "");
    test(12, tok!"{", "", "");
    test(13, tok!"identifier", "could be better", "");
    test(14, tok!"(", "", "");
    test(15, tok!"stringLiteral", "", "");
    test(16, tok!")", "", "");
    test(17, tok!";", "", "");
    test(18, tok!"}", "", "");
}

// dlang-community/D-Scanner#805
unittest
{
    final class SomeExpr
    {
        Token tok;
    }

    auto e1 = new SomeExpr();
    const e2 = new SomeExpr();
    immutable e3 = new immutable SomeExpr();

    immutable t1 = e1.tok;
    immutable t2 = e2.tok;
    immutable t3 = e3.tok;
}