testTokenizer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 #include "squid.h"
10 #include "base/CharacterSet.h"
11 #include "compat/cppunit.h"
12 #include "parser/Tokenizer.h"
13 #include "unitTestMain.h"
14 
15 class TestTokenizer : public CPPUNIT_NS::TestFixture
16 {
24 
25 protected:
26  void testTokenizerPrefix();
27  void testTokenizerSuffix();
28  void testTokenizerSkip();
29  void testTokenizerToken();
30  void testTokenizerInt64();
31 };
33 
34 SBuf text("GET http://resource.com/path HTTP/1.1\r\n"
35  "Host: resource.com\r\n"
36  "Cookie: laijkpk3422r j1noin \r\n"
37  "\r\n");
38 const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
39 const CharacterSet whitespace("whitespace"," \r\n");
40 const CharacterSet crlf("crlf","\r\n");
41 const CharacterSet tab("tab","\t");
42 const CharacterSet numbers("numbers","0123456789");
43 
44 void
46 {
47  const SBuf canary("This text should not be changed.");
48 
50  SBuf s;
51 
53  all += alpha;
54  all += crlf;
55  all += numbers;
56  all.add(':').add('.').add('/');
57 
58  // an empty prefix should return false (the full output buffer case)
59  s = canary;
60  const SBuf before = t.remaining();
61  CPPUNIT_ASSERT(!t.prefix(s, all, 0));
62  // ... and a false return value means no parameter changes
63  CPPUNIT_ASSERT_EQUAL(canary, s);
64  // ... and a false return value means no input buffer changes
65  CPPUNIT_ASSERT_EQUAL(before, t.remaining());
66 
67  // successful prefix tokenization
68  CPPUNIT_ASSERT(t.prefix(s,alpha));
69  CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
70  CPPUNIT_ASSERT(t.prefix(s,whitespace));
71  CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
72 
73  //no match (first char is not in the prefix set)
74  CPPUNIT_ASSERT(!t.prefix(s,whitespace));
75  CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
76 
77  // one more match to set S to something meaningful
78  CPPUNIT_ASSERT(t.prefix(s,alpha));
79  CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
80 
81  //no match (no characters from the character set in the prefix)
82  CPPUNIT_ASSERT(!t.prefix(s,tab));
83  CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched
84 
85  // match until the end of the sample
86  CPPUNIT_ASSERT(t.prefix(s,all));
87  CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining());
88 
89  // empty prefix should return false (the empty input buffer case)
90  s = canary;
91  CPPUNIT_ASSERT(!t.prefix(s, all));
92  // ... and a false return value means no parameter changes
93  CPPUNIT_ASSERT_EQUAL(canary, s);
94 }
95 
96 void
98 {
100  SBuf s;
101 
102  // first scenario: patterns match
103  // prep for test
104  CPPUNIT_ASSERT(t.prefix(s,alpha));
105  CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
106 
107  // test skipping one character from a character set
108  CPPUNIT_ASSERT(t.skipOne(whitespace));
109  // check that skip was right
110  CPPUNIT_ASSERT(t.prefix(s,alpha));
111  CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
112 
113  //check skip prefix
114  CPPUNIT_ASSERT(t.skip(SBuf("://")));
115  // verify
116  CPPUNIT_ASSERT(t.prefix(s,alpha));
117  CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s);
118 
119  // no skip
120  CPPUNIT_ASSERT(!t.skipOne(alpha));
121  CPPUNIT_ASSERT(!t.skip(SBuf("://")));
122  CPPUNIT_ASSERT(!t.skip('a'));
123 
124  // test skipping all characters from a character set while looking at .com
125  CPPUNIT_ASSERT(t.skip('.'));
126  CPPUNIT_ASSERT_EQUAL(static_cast<SBuf::size_type>(3), t.skipAll(alpha));
127  CPPUNIT_ASSERT(t.remaining().startsWith(SBuf("/path")));
128 }
129 
130 void
132 {
134  SBuf s;
135 
136  // first scenario: patterns match
137  CPPUNIT_ASSERT(t.token(s,whitespace));
138  CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
139  CPPUNIT_ASSERT(t.token(s,whitespace));
140  CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s);
141  CPPUNIT_ASSERT(t.token(s,whitespace));
142  CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s);
143  CPPUNIT_ASSERT(t.token(s,whitespace));
144  CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s);
145 
146 }
147 
148 void
150 {
151  const SBuf canary("This text should not be changed.");
152 
154  SBuf s;
155 
157  all += alpha;
158  all += crlf;
159  all += numbers;
160  all.add(':').add('.').add('/');
161 
162  // an empty suffix should return false (the full output buffer case)
163  s = canary;
164  const SBuf before = t.remaining();
165  CPPUNIT_ASSERT(!t.suffix(s, all, 0));
166  // ... and a false return value means no parameter changes
167  CPPUNIT_ASSERT_EQUAL(canary, s);
168  // ... and a false return value means no input buffer changes
169  CPPUNIT_ASSERT_EQUAL(before, t.remaining());
170 
171  // consume suffix until the last CRLF, including that last CRLF
172  SBuf::size_type remaining = t.remaining().length();
173  while (t.remaining().findLastOf(crlf) != SBuf::npos) {
174  CPPUNIT_ASSERT(t.remaining().length() > 0);
175  CPPUNIT_ASSERT(t.skipOneTrailing(all));
176  // ensure steady progress
177  CPPUNIT_ASSERT_EQUAL(remaining, t.remaining().length() + 1);
178  --remaining;
179  }
180 
181  // no match (last char is not in the suffix set)
182  CPPUNIT_ASSERT(!t.suffix(s, crlf));
183  CPPUNIT_ASSERT(!t.suffix(s, whitespace));
184 
185  // successful suffix tokenization
186  CPPUNIT_ASSERT(t.suffix(s, numbers));
187  CPPUNIT_ASSERT_EQUAL(SBuf("1"), s);
188  CPPUNIT_ASSERT(t.skipSuffix(SBuf("1.")));
189  CPPUNIT_ASSERT(t.skipSuffix(SBuf("/")));
190  CPPUNIT_ASSERT(t.suffix(s, alpha));
191  CPPUNIT_ASSERT_EQUAL(SBuf("HTTP"), s);
192  CPPUNIT_ASSERT(t.suffix(s, whitespace));
193  CPPUNIT_ASSERT_EQUAL(SBuf(" "), s);
194 
195  // match until the end of the sample
196  CPPUNIT_ASSERT(t.suffix(s, all));
197  CPPUNIT_ASSERT_EQUAL(SBuf(), t.remaining());
198 
199  // an empty buffer does not end with a token
200  s = canary;
201  CPPUNIT_ASSERT(!t.suffix(s, all));
202  CPPUNIT_ASSERT_EQUAL(canary, s); // no parameter changes
203 
204  // we cannot skip an empty suffix, even in an empty buffer
205  CPPUNIT_ASSERT(!t.skipSuffix(SBuf()));
206 }
207 
208 void
210 {
211  // successful parse in base 10
212  {
213  int64_t rv;
214  Parser::Tokenizer t(SBuf("1234"));
215  const int64_t benchmark = 1234;
216  CPPUNIT_ASSERT(t.int64(rv, 10));
217  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
218  CPPUNIT_ASSERT(t.buf().isEmpty());
219  }
220 
221  // successful parse, autodetect base
222  {
223  int64_t rv;
224  Parser::Tokenizer t(SBuf("1234"));
225  const int64_t benchmark = 1234;
226  CPPUNIT_ASSERT(t.int64(rv));
227  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
228  CPPUNIT_ASSERT(t.buf().isEmpty());
229  }
230 
231  // successful parse, autodetect base
232  {
233  int64_t rv;
234  Parser::Tokenizer t(SBuf("01234"));
235  const int64_t benchmark = 01234;
236  CPPUNIT_ASSERT(t.int64(rv));
237  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
238  CPPUNIT_ASSERT(t.buf().isEmpty());
239  }
240 
241  // successful parse, autodetect base
242  {
243  int64_t rv;
244  Parser::Tokenizer t(SBuf("0x12f4"));
245  const int64_t benchmark = 0x12f4;
246  CPPUNIT_ASSERT(t.int64(rv));
247  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
248  CPPUNIT_ASSERT(t.buf().isEmpty());
249  }
250 
251  // When interpreting octal numbers, standard strtol() and Tokenizer::int64()
252  // treat leading zero as a part of sequence of digits rather than a
253  // character used _exclusively_ as base indicator. Thus, it is not possible
254  // to create an invalid octal number with an explicit octal base -- the
255  // first invalid character after the base will be successfully ignored. This
256  // treatment also makes it difficult to define "shortest valid octal input".
257  // Here, we are just enumerating interesting "short input" octal cases in
258  // four dimensions:
259  // 1. int64(base) argument: forced or auto-detected;
260  // 2. base character ("0") in input: absent or present;
261  // 3. post-base digits in input: absent, valid, or invalid;
262  // 4. input length limits via int64(length) argument: unlimited or limited.
263 
264  // forced base; input: no base, no post-base digits, unlimited
265  {
266  int64_t rv;
267  Parser::Tokenizer t(SBuf(""));
268  CPPUNIT_ASSERT(!t.int64(rv, 8));
269  CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
270  }
271 
272  // forced base; input: no base, no post-base digits, limited
273  {
274  int64_t rv;
275  Parser::Tokenizer t(SBuf("7"));
276  CPPUNIT_ASSERT(!t.int64(rv, 8, false, 0));
277  CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf());
278  }
279 
280  // forced base; input: no base, one valid post-base digit, unlimited
281  {
282  int64_t rv;
283  Parser::Tokenizer t(SBuf("4"));
284  const int64_t benchmark = 04;
285  CPPUNIT_ASSERT(t.int64(rv, 8));
286  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
287  CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
288  }
289 
290  // forced base; input: no base, one valid post-base digit, limited
291  {
292  int64_t rv;
293  Parser::Tokenizer t(SBuf("46"));
294  const int64_t benchmark = 04;
295  CPPUNIT_ASSERT(t.int64(rv, 8, false, 1));
296  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
297  CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf());
298  }
299 
300  // forced base; input: no base, one invalid post-base digit, unlimited
301  {
302  int64_t rv;
303  Parser::Tokenizer t(SBuf("8"));
304  CPPUNIT_ASSERT(!t.int64(rv, 8));
305  CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
306  }
307 
308  // forced base; input: no base, one invalid post-base digit, limited
309  {
310  int64_t rv;
311  Parser::Tokenizer t(SBuf("80"));
312  CPPUNIT_ASSERT(!t.int64(rv, 8, false, 1));
313  CPPUNIT_ASSERT_EQUAL(SBuf("80"), t.buf());
314  }
315 
316  // repeat the above six octal cases, but now with base character in input
317 
318  // forced base; input: base, no post-base digits, unlimited
319  {
320  int64_t rv;
321  Parser::Tokenizer t(SBuf("0"));
322  const int64_t benchmark = 0;
323  CPPUNIT_ASSERT(t.int64(rv, 8));
324  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
325  CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
326  }
327 
328  // forced base; input: base, no post-base digits, limited
329  {
330  int64_t rv;
331  Parser::Tokenizer t(SBuf("07"));
332  const int64_t benchmark = 0;
333  CPPUNIT_ASSERT(t.int64(rv, 8, false, 1));
334  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
335  CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf());
336  }
337 
338  // forced base; input: base, one valid post-base digit, unlimited
339  {
340  int64_t rv;
341  Parser::Tokenizer t(SBuf("04"));
342  const int64_t benchmark = 04;
343  CPPUNIT_ASSERT(t.int64(rv, 8));
344  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
345  CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
346  }
347 
348  // forced base; input: base, one valid post-base digit, limited
349  {
350  int64_t rv;
351  Parser::Tokenizer t(SBuf("046"));
352  const int64_t benchmark = 04;
353  CPPUNIT_ASSERT(t.int64(rv, 8, false, 2));
354  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
355  CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf());
356  }
357 
358  // forced base; input: base, one invalid post-base digit, unlimited
359  {
360  int64_t rv;
361  Parser::Tokenizer t(SBuf("08"));
362  const int64_t benchmark = 00;
363  CPPUNIT_ASSERT(t.int64(rv, 8));
364  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
365  CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
366  }
367 
368  // forced base; input: base, one invalid post-base digit, limited
369  {
370  int64_t rv;
371  Parser::Tokenizer t(SBuf("08"));
372  const int64_t benchmark = 00;
373  CPPUNIT_ASSERT(t.int64(rv, 8, false, 2));
374  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
375  CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
376  }
377 
378  // And now repeat six "with base character in input" octal cases but with
379  // auto-detected base. When octal cases below say "auto-detected base", they
380  // describe int64() base=0 parameter value. Current int64() implementation
381  // does auto-detect base as octal in all of these cases, but that might
382  // change, and some of these cases (e.g., "0") can also be viewed as a
383  // non-octal input case as well. These cases do not attempt to test base
384  // detection. They focus on other potential problems.
385 
386  // auto-detected base; input: base, no post-base digits, unlimited
387  {
388  int64_t rv;
389  Parser::Tokenizer t(SBuf("0"));
390  const int64_t benchmark = 00;
391  CPPUNIT_ASSERT(t.int64(rv, 0));
392  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
393  CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
394  }
395 
396  // auto-detected base; input: base, no post-base digits, limited
397  {
398  int64_t rv;
399  Parser::Tokenizer t(SBuf("07"));
400  const int64_t benchmark = 0;
401  CPPUNIT_ASSERT(t.int64(rv, 0, false, 1));
402  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
403  CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf());
404  }
405 
406  // auto-detected base; input: base, one valid post-base digit, unlimited
407  {
408  int64_t rv;
409  Parser::Tokenizer t(SBuf("04"));
410  const int64_t benchmark = 04;
411  CPPUNIT_ASSERT(t.int64(rv, 0));
412  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
413  CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
414  }
415 
416  // auto-detected base; input: base, one valid post-base digit, limited
417  {
418  int64_t rv;
419  Parser::Tokenizer t(SBuf("046"));
420  const int64_t benchmark = 04;
421  CPPUNIT_ASSERT(t.int64(rv, 0, false, 2));
422  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
423  CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf());
424  }
425 
426  // auto-detected base; input: base, one invalid post-base digit, unlimited
427  {
428  int64_t rv;
429  Parser::Tokenizer t(SBuf("08"));
430  const int64_t benchmark = 00;
431  CPPUNIT_ASSERT(t.int64(rv, 0));
432  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
433  CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
434  }
435 
436  // auto-detected base; input: base, one invalid post-base digit, limited
437  {
438  int64_t rv;
439  Parser::Tokenizer t(SBuf("08"));
440  const int64_t benchmark = 00;
441  CPPUNIT_ASSERT(t.int64(rv, 0, false, 2));
442  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
443  CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
444  }
445 
446  // this ends four-dimensional enumeration of octal cases described earlier
447 
448  // check octal base auto-detection
449  {
450  int64_t rv;
451  Parser::Tokenizer t(SBuf("0128"));
452  const int64_t benchmark = 012;
453  CPPUNIT_ASSERT(t.int64(rv, 0));
454  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
455  CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
456  }
457 
458  // check that octal base auto-detection is not confused by repeated zeros
459  {
460  int64_t rv;
461  Parser::Tokenizer t(SBuf("00000000071"));
462  const int64_t benchmark = 00000000071;
463  CPPUNIT_ASSERT(t.int64(rv));
464  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
465  CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
466  }
467 
468  // check that forced octal base is not confused by hex prefix
469  {
470  int64_t rv;
471  Parser::Tokenizer t(SBuf("0x5"));
472  const int64_t benchmark = 0;
473  CPPUNIT_ASSERT(t.int64(rv, 8));
474  CPPUNIT_ASSERT_EQUAL(benchmark, rv);
475  CPPUNIT_ASSERT_EQUAL(SBuf("x5"), t.buf());
476  }
477 
478  // autodetect decimal base in shortest valid input
479  {
480  int64_t rv;
481  Parser::Tokenizer t(SBuf("1"));
482  const int64_t benchmark = 1;
483  CPPUNIT_ASSERT(t.int64(rv));
484  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
485  CPPUNIT_ASSERT(t.buf().isEmpty());
486  }
487 
488  // autodetect hex base in shortest valid input
489  {
490  int64_t rv;
491  Parser::Tokenizer t(SBuf("0X1"));
492  const int64_t benchmark = 0X1;
493  CPPUNIT_ASSERT(t.int64(rv));
494  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
495  CPPUNIT_ASSERT(t.buf().isEmpty());
496  }
497 
498  // invalid (when autodetecting base) input matching hex base
499  {
500  int64_t rv;
501  Parser::Tokenizer t(SBuf("0x"));
502  CPPUNIT_ASSERT(!t.int64(rv));
503  CPPUNIT_ASSERT_EQUAL(SBuf("0x"), t.buf());
504  }
505 
506  // invalid (when forcing hex base) input matching hex base
507  {
508  int64_t rv;
509  Parser::Tokenizer t(SBuf("0x"));
510  CPPUNIT_ASSERT(!t.int64(rv, 16));
511  CPPUNIT_ASSERT_EQUAL(SBuf("0x"), t.buf());
512  }
513 
514  // invalid (when autodetecting base and limiting) input matching hex base
515  {
516  int64_t rv;
517  Parser::Tokenizer t(SBuf("0x2"));
518  CPPUNIT_ASSERT(!t.int64(rv, 0, true, 2));
519  CPPUNIT_ASSERT_EQUAL(SBuf("0x2"), t.buf());
520  }
521 
522  // invalid (when forcing hex base and limiting) input matching hex base
523  {
524  int64_t rv;
525  Parser::Tokenizer t(SBuf("0x3"));
526  CPPUNIT_ASSERT(!t.int64(rv, 16, false, 2));
527  CPPUNIT_ASSERT_EQUAL(SBuf("0x3"), t.buf());
528  }
529 
530  // API mismatch: don't eat leading space
531  {
532  int64_t rv;
533  Parser::Tokenizer t(SBuf(" 1234"));
534  CPPUNIT_ASSERT(!t.int64(rv));
535  CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf());
536  }
537 
538  // API mismatch: don't eat multiple leading spaces
539  {
540  int64_t rv;
541  Parser::Tokenizer t(SBuf(" 1234"));
542  CPPUNIT_ASSERT(!t.int64(rv));
543  CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf());
544  }
545 
546  // zero corner case: repeated zeros
547  {
548  int64_t rv;
549  Parser::Tokenizer t(SBuf("00"));
550  const int64_t benchmark = 00;
551  CPPUNIT_ASSERT(t.int64(rv));
552  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
553  CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
554  }
555 
556  // zero corner case: "positive" zero
557  {
558  int64_t rv;
559  Parser::Tokenizer t(SBuf("+0"));
560  const int64_t benchmark = +0;
561  CPPUNIT_ASSERT(t.int64(rv));
562  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
563  CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
564  }
565 
566  // zero corner case: "negative" zero
567  {
568  int64_t rv;
569  Parser::Tokenizer t(SBuf("-0"));
570  const int64_t benchmark = -0;
571  CPPUNIT_ASSERT(t.int64(rv));
572  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
573  CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
574  }
575 
576  // trailing spaces
577  {
578  int64_t rv;
579  Parser::Tokenizer t(SBuf("1234 foo"));
580  const int64_t benchmark = 1234;
581  CPPUNIT_ASSERT(t.int64(rv));
582  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
583  CPPUNIT_ASSERT_EQUAL(SBuf(" foo"), t.buf());
584  }
585 
586  // trailing nonspaces
587  {
588  int64_t rv;
589  Parser::Tokenizer t(SBuf("1234foo"));
590  const int64_t benchmark = 1234;
591  CPPUNIT_ASSERT(t.int64(rv));
592  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
593  CPPUNIT_ASSERT_EQUAL(SBuf("foo"), t.buf());
594  }
595 
596  // trailing nonspaces
597  {
598  int64_t rv;
599  Parser::Tokenizer t(SBuf("0x1234foo"));
600  const int64_t benchmark = 0x1234f;
601  CPPUNIT_ASSERT(t.int64(rv));
602  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
603  CPPUNIT_ASSERT_EQUAL(SBuf("oo"), t.buf());
604  }
605 
606  // overflow
607  {
608  int64_t rv;
609  Parser::Tokenizer t(SBuf("1029397752385698678762234"));
610  CPPUNIT_ASSERT(!t.int64(rv));
611  CPPUNIT_ASSERT_EQUAL(SBuf("1029397752385698678762234"), t.buf());
612  }
613 
614  // buffered sub-string parsing
615  {
616  int64_t rv;
617  SBuf base("1029397752385698678762234");
618  const int64_t benchmark = 22;
619  Parser::Tokenizer t(base.substr(base.length()-4,2));
620  CPPUNIT_ASSERT_EQUAL(SBuf("22"),t.buf());
621  CPPUNIT_ASSERT(t.int64(rv));
622  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
623  CPPUNIT_ASSERT(t.buf().isEmpty());
624  }
625 
626  // base-16, prefix
627  {
628  int64_t rv;
629  SBuf base("deadbeefrow");
630  const int64_t benchmark=0xdeadbeef;
631  Parser::Tokenizer t(base);
632  CPPUNIT_ASSERT(t.int64(rv,16));
633  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
634  CPPUNIT_ASSERT_EQUAL(SBuf("row"),t.buf());
635 
636  }
637 }
638 
639 int
640 main(int argc, char *argv[])
641 {
642  return TestProgram().run(argc, argv);
643 }
644 
bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
Definition: Tokenizer.cc:79
void testTokenizerSkip()
SBuf::size_type skipAll(const CharacterSet &discardables)
Definition: Tokenizer.cc:137
bool skipOneTrailing(const CharacterSet &discardables)
Definition: Tokenizer.cc:211
void testTokenizerToken()
bool isEmpty() const
Definition: SBuf.h:435
SBuf buf() const
yet unparsed data
Definition: Tokenizer.h:35
bool token(SBuf &returnedToken, const CharacterSet &delimiters)
Definition: Tokenizer.cc:61
CPPUNIT_TEST_SUITE(TestTokenizer)
implements test program's main() function while enabling customization
Definition: unitTestMain.h:25
Definition: SBuf.h:93
bool skip(const SBuf &tokenToSkip)
Definition: Tokenizer.cc:189
bool startsWith(const SBuf &S, const SBufCaseSensitive isCaseSensitive=caseSensitive) const
Definition: SBuf.cc:442
CPPUNIT_TEST(testTokenizerPrefix)
CharacterSet & add(const unsigned char c)
add a given character to the character set
Definition: CharacterSet.cc:47
SBuf substr(size_type pos, size_type n=npos) const
Definition: SBuf.cc:576
int run(int argc, char *argv[])
Definition: unitTestMain.h:44
void testTokenizerSuffix()
CPPUNIT_TEST_SUITE_REGISTRATION(TestTokenizer)
void testTokenizerPrefix()
const SBuf & remaining() const
the remaining unprocessed section of buffer
Definition: Tokenizer.h:44
MemBlob::size_type size_type
Definition: SBuf.h:96
SBuf text("GET http://resource.com/path HTTP/1.1\r\n" "Host: resource.com\r\n" "Cookie: laijkpk3422r j1noin \r\n" "\r\n")
const CharacterSet whitespace("whitespace"," \r\n")
const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
const CharacterSet numbers("numbers","0123456789")
void testTokenizerInt64()
size_type length() const
Returns the number of bytes stored in SBuf.
Definition: SBuf.h:419
size_type findLastOf(const CharacterSet &set, size_type endPos=npos) const
Definition: SBuf.cc:769
const CharacterSet tab("tab","\t")
static const size_type npos
Definition: SBuf.h:100
int main(int argc, char *argv[])
bool int64(int64_t &result, int base=0, bool allowSign=true, SBuf::size_type limit=SBuf::npos)
Definition: Tokenizer.cc:238
bool skipOne(const CharacterSet &discardables)
Definition: Tokenizer.cc:161
bool skipSuffix(const SBuf &tokenToSkip)
Definition: Tokenizer.cc:172
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:17
bool suffix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
Definition: Tokenizer.cc:117
const CharacterSet crlf("crlf","\r\n")
Definition: Elements.cc:12

 

Introduction

Documentation

Support

Miscellaneous