CharacterSet.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 #include "squid.h"
10 #include "base/CharacterSet.h"
11 
12 #include <algorithm>
13 #include <iostream>
14 #include <functional>
15 
18 {
19  Storage::const_iterator s = src.chars_.begin();
20  const Storage::const_iterator e = src.chars_.end();
21  Storage::iterator d = chars_.begin();
22  while (s != e) {
23  if (*s)
24  *d = 1;
25  ++s;
26  ++d;
27  }
28  return *this;
29 }
30 
33 {
34  Storage::const_iterator s = src.chars_.begin();
35  const Storage::const_iterator e = src.chars_.end();
36  Storage::iterator d = chars_.begin();
37  while (s != e) {
38  if (*s)
39  *d = 0;
40  ++s;
41  ++d;
42  }
43  return *this;
44 }
45 
47 CharacterSet::add(const unsigned char c)
48 {
49  chars_[static_cast<uint8_t>(c)] = 1;
50  return *this;
51 }
52 
54 CharacterSet::remove(const unsigned char c)
55 {
56  chars_[static_cast<uint8_t>(c)] = 0;
57  return *this;
58 }
59 
61 CharacterSet::addRange(unsigned char low, unsigned char high)
62 {
63  //manual loop splitting is needed to cover case where high is 255
64  // otherwise low will wrap, resulting in infinite loop
65  while (low < high) {
66  chars_[static_cast<uint8_t>(low)] = 1;
67  ++low;
68  }
69  chars_[static_cast<uint8_t>(high)] = 1;
70  return *this;
71 }
72 
74 CharacterSet::complement(const char *label) const
75 {
76  CharacterSet result((label ? label : "complement_of_some_other_set"), "");
77  // negate each of our elements and add them to the result storage
78  std::transform(chars_.begin(), chars_.end(), result.chars_.begin(),
79  std::logical_not<Storage::value_type>());
80  return result;
81 }
82 
83 CharacterSet::CharacterSet(const char *label, const char * const c) :
84  name(label ? label: "anonymous"),
85  chars_(Storage(256,0))
86 {
87  const size_t clen = strlen(c);
88  for (size_t i = 0; i < clen; ++i)
89  add(c[i]);
90 }
91 
92 CharacterSet::CharacterSet(const char *label, unsigned char low, unsigned char high) :
93  name(label ? label: "anonymous"),
94  chars_(Storage(256,0))
95 {
96  addRange(low,high);
97 }
98 
99 CharacterSet::CharacterSet(const char *label, std::initializer_list<std::pair<uint8_t, uint8_t>> ranges) :
100  name(label ? label: "anonymous"),
101  chars_(Storage(256,0))
102 {
103  for (auto range: ranges)
104  addRange(range.first, range.second);
105 }
106 
107 void
108 CharacterSet::printChars(std::ostream &os) const
109 {
110  for (size_t idx = 0; idx < 256; ++idx) {
111  if (chars_[idx])
112  os << static_cast<char>(idx);
113  }
114 }
115 
118 {
119  lhs += rhs;
120  return lhs;
121 }
122 
125 {
126  lhs -= rhs;
127  return lhs;
128 }
129 
130 std::ostream&
131 operator <<(std::ostream &s, const CharacterSet &c)
132 {
133  s << "CharacterSet(" << c.name << ')';
134  return s;
135 }
136 
137 const CharacterSet
138 // RFC 5234
139 CharacterSet::ALPHA("ALPHA", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
140  CharacterSet::BIT("BIT","01"),
141  CharacterSet::CR("CR","\r"),
142 CharacterSet::CTL("CTL", {{0x01,0x1f},{0x7f,0x7f}}),
143 CharacterSet::DIGIT("DIGIT","0123456789"),
144 CharacterSet::DQUOTE("DQUOTE","\""),
145 CharacterSet::HEXDIG("HEXDIG","0123456789aAbBcCdDeEfF"),
146 CharacterSet::HTAB("HTAB","\t"),
147 CharacterSet::LF("LF","\n"),
148 CharacterSet::SP("SP"," "),
149 CharacterSet::VCHAR("VCHAR", 0x21, 0x7e),
150 // RFC 7230
151 CharacterSet::WSP("WSP"," \t"),
152 CharacterSet::CTEXT("ctext", {{0x09,0x09},{0x20,0x20},{0x2a,0x5b},{0x5d,0x7e},{0x80,0xff}}),
153 CharacterSet::TCHAR("TCHAR","!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
154 CharacterSet::SPECIAL("SPECIAL","()<>@,;:\\\"/[]?={}"),
155 CharacterSet::QDTEXT("QDTEXT", {{0x09,0x09},{0x20,0x21},{0x23,0x5b},{0x5d,0x7e},{0x80,0xff}}),
156 CharacterSet::OBSTEXT("OBSTEXT",0x80,0xff),
157 // RFC 7232
158 CharacterSet::ETAGC("ETAGC", {{0x21,0x21},{0x23,0x7e},{0x80,0xff}}),
159 // RFC 7235
160 CharacterSet::TOKEN68C("TOKEN68C","-._~+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
161 ;
162 
163 const CharacterSet &
165 {
166  // RFC 3986: unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
167  static const auto chars = new CharacterSet("RFC3986_UNRESERVED", "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~");
168  return *chars;
169 }
170 
const char * name
optional set label for debugging (default: "anonymous")
Definition: CharacterSet.h:72
CharacterSet operator+(CharacterSet lhs, const CharacterSet &rhs)
CharacterSet complement(const char *complementLabel=nullptr) const
Definition: CharacterSet.cc:74
static const CharacterSet CTEXT
Definition: CharacterSet.h:102
static const CharacterSet ETAGC
Definition: CharacterSet.h:115
static const CharacterSet LF
Definition: CharacterSet.h:92
std::ostream & operator<<(std::ostream &s, const CharacterSet &c)
CharacterSet & add(const unsigned char c)
add a given character to the character set
Definition: CharacterSet.cc:47
CharacterSet & operator+=(const CharacterSet &rhs)
set addition: add to this set all characters that are also in rhs
Definition: CharacterSet.cc:17
static const CharacterSet VCHAR
Definition: CharacterSet.h:96
static const CharacterSet ALPHA
Definition: CharacterSet.h:76
static const CharacterSet CR
Definition: CharacterSet.h:80
static const CharacterSet DQUOTE
Definition: CharacterSet.h:86
static const CharacterSet WSP
Definition: CharacterSet.h:98
std::vector< uint8_t > Storage
Definition: CharacterSet.h:20
CharacterSet & addRange(unsigned char low, unsigned char high)
add a list of character ranges, expressed as pairs [low,high], including both ends
Definition: CharacterSet.cc:61
static const CharacterSet TCHAR
Definition: CharacterSet.h:105
static const CharacterSet HEXDIG
Definition: CharacterSet.h:88
CharacterSet & operator-=(const CharacterSet &rhs)
set subtraction: remove all characters that are also in rhs
Definition: CharacterSet.cc:32
static const CharacterSet HTAB
Definition: CharacterSet.h:90
static const CharacterSet SPECIAL
Definition: CharacterSet.h:107
void printChars(std::ostream &os) const
prints all chars in arbitrary order, without any quoting/escaping
static const CharacterSet QDTEXT
Definition: CharacterSet.h:109
static const CharacterSet BIT
Definition: CharacterSet.h:78
static const CharacterSet DIGIT
Definition: CharacterSet.h:84
CharacterSet & remove(const unsigned char c)
remove a given character from the character set
Definition: CharacterSet.cc:54
CharacterSet(const char *label="anonymous", const char *const chars="")
a character set with a given label and contents
Definition: CharacterSet.cc:83
Storage chars_
Definition: CharacterSet.h:130
static const CharacterSet & RFC3986_UNRESERVED()
allowed URI characters that do not have a reserved purpose, RFC 3986
static const CharacterSet TOKEN68C
Definition: CharacterSet.h:119
static const CharacterSet CTL
Definition: CharacterSet.h:82
static const CharacterSet OBSTEXT
Definition: CharacterSet.h:111
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:17
CharacterSet operator-(CharacterSet lhs, const CharacterSet &rhs)
static const CharacterSet SP
Definition: CharacterSet.h:94

 

Introduction

Documentation

Support

Miscellaneous