Tokenizer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 #include "squid.h"
10 #include "debug/Stream.h"
11 #include "http/one/Parser.h"
12 #include "http/one/Tokenizer.h"
13 #include "parser/Tokenizer.h"
14 #include "sbuf/Stream.h"
15 
20 static SBuf
22 {
23  /*
24  * RFC 1945 - defines qdtext:
25  * inclusive of LWS (which includes CR and LF)
26  * exclusive of 0x80-0xFF
27  * includes 0x5C ('\') as just a regular character
28  */
29  static const CharacterSet qdtext1p0 = CharacterSet("qdtext (HTTP/1.0)", 0x23, 0x7E) +
30  CharacterSet("", "!") +
32  /*
33  * RFC 7230 - defines qdtext:
34  * exclusive of CR and LF
35  * inclusive of 0x80-0xFF
36  * includes 0x5C ('\') but only when part of quoted-pair
37  */
38  static const CharacterSet qdtext1p1 = CharacterSet("qdtext (HTTP/1.1)", 0x23, 0x5B) +
39  CharacterSet("", "!") +
40  CharacterSet("", 0x5D, 0x7E) +
43 
44  // best we can do is a conditional reference since http1p0 value may change per-client
45  const CharacterSet &tokenChars = (http1p0 ? qdtext1p0 : qdtext1p1);
46 
47  SBuf parsedToken;
48 
49  while (!tok.atEnd()) {
50  SBuf qdText;
51  if (tok.prefix(qdText, tokenChars))
52  parsedToken.append(qdText);
53 
54  if (!http1p0 && tok.skip('\\')) { // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
55  if (tok.atEnd())
56  break;
57 
58  /* RFC 7230 section 3.2.6
59  *
60  * The backslash octet ("\") can be used as a single-octet quoting
61  * mechanism within quoted-string and comment constructs. Recipients
62  * that process the value of a quoted-string MUST handle a quoted-pair
63  * as if it were replaced by the octet following the backslash.
64  *
65  * quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
66  */
68  SBuf escaped;
69  if (!tok.prefix(escaped, qPairChars, 1))
70  throw TexcHere("invalid escaped character in quoted-pair");
71 
72  parsedToken.append(escaped);
73  continue;
74  }
75 
76  if (tok.skip('"'))
77  return parsedToken; // may be empty
78 
79  if (tok.atEnd())
80  break;
81 
82  throw TexcHere(ToSBuf("invalid bytes for set ", tokenChars.name));
83  }
84 
85  throw Http::One::InsufficientInput();
86 }
87 
88 SBuf
90 {
91  if (tok.skip('"'))
92  return parseQuotedStringSuffix(tok, http1p0);
93 
94  if (tok.atEnd())
95  throw InsufficientInput();
96 
97  SBuf parsedToken;
98  if (!tok.prefix(parsedToken, CharacterSet::TCHAR))
99  throw TexcHere("invalid input while expecting an HTTP token");
100 
101  if (tok.atEnd())
102  throw InsufficientInput();
103 
104  // got the complete token
105  return parsedToken;
106 }
107 
const char * name
optional set label for debugging (default: "anonymous")
Definition: CharacterSet.h:72
Definition: SBuf.h:93
static const CharacterSet LF
Definition: CharacterSet.h:92
static const CharacterSet VCHAR
Definition: CharacterSet.h:96
#define TexcHere(msg)
legacy convenience macro; it is not difficult to type Here() now
Definition: TextException.h:63
static const CharacterSet CR
Definition: CharacterSet.h:80
static const CharacterSet TCHAR
Definition: CharacterSet.h:105
static const CharacterSet HTAB
Definition: CharacterSet.h:90
SBuf tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0=false)
Definition: Tokenizer.cc:89
SBuf & append(const SBuf &S)
Definition: SBuf.cc:185
Definition: parse.c:160
SBuf ToSBuf(Args &&... args)
slowly stream-prints all arguments into a freshly allocated SBuf
Definition: Stream.h:63
static const CharacterSet OBSTEXT
Definition: CharacterSet.h:111
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:17
static SBuf parseQuotedStringSuffix(Parser::Tokenizer &tok, const bool http1p0)
Definition: Tokenizer.cc:21
static const CharacterSet SP
Definition: CharacterSet.h:94

 

Introduction

Documentation

Support

Miscellaneous