RegexData.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 /*
10  * Portions of this code are copyrighted and released under GPLv2+ by:
11  * Copyright (c) 2011, Marcus Kool
12  * Please add new claims to the CONTRIBUTORS file instead.
13  */
14 
15 /* DEBUG: section 28 Access Control */
16 
17 #include "squid.h"
18 #include "acl/Acl.h"
19 #include "acl/Checklist.h"
20 #include "acl/RegexData.h"
21 #include "base/RegexPattern.h"
22 #include "cache_cf.h"
23 #include "ConfigParser.h"
24 #include "debug/Stream.h"
25 #include "sbuf/Algorithms.h"
26 #include "sbuf/List.h"
27 #include "sbuf/Stream.h"
28 
30 
32 {
33 }
34 
35 const Acl::Options &
37 {
38  static auto MyCaseSensitivityOption = Acl::CaseSensitivityOption();
39  static const Acl::Options MyOptions = { &MyCaseSensitivityOption };
40  MyCaseSensitivityOption.linkWith(&CaseInsensitive_);
41  return MyOptions;
42 }
43 
44 bool
45 ACLRegexData::match(char const *word)
46 {
47  if (!word)
48  return 0;
49 
50  debugs(28, 3, "checking '" << word << "'");
51 
52  // walk the list of patterns to see if one matches
53  for (auto &i : data) {
54  if (i.match(word)) {
55  debugs(28, 2, '\'' << i << "' found in '" << word << '\'');
56  // TODO: old code also popped the pattern to second place of the list
57  // in order to reduce patterns search times.
58  return 1;
59  }
60  }
61 
62  return 0;
63 }
64 
67 {
68  SBufStream os;
69 
70  const RegexPattern *previous = nullptr;
71  for (const auto &i: data) {
72  i.print(os, previous); // skip flags implied by the previous entry
73  previous = &i;
74  }
75 
76  return SBufList(1, os.buf());
77 }
78 
79 static const char *
81 {
82  if (strcmp(t, ".*") == 0) // we cannot simplify that further
83  return t; // avoid "WARNING: ... Using '.*' instead" below
84 
85  char * orig = t;
86 
87  if (strncmp(t, "^.*", 3) == 0)
88  t += 3;
89 
90  /* NOTE: an initial '.' might seem unnessary but is not;
91  * it can be a valid requirement that cannot be optimised
92  */
93  while (*t == '.' && *(t+1) == '*') {
94  t += 2;
95  }
96 
97  if (*t == '\0') {
98  debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
99  debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead.");
100  return ".*";
101  }
102  if (t != orig) {
103  debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
104  debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead.");
105  }
106 
107  return t;
108 }
109 
110 static void
111 compileRE(std::list<RegexPattern> &curlist, const SBuf &RE, int flags)
112 {
113  curlist.emplace_back(RE, flags);
114 }
115 
116 static void
117 compileREs(std::list<RegexPattern> &curlist, const SBufList &RE, int flags)
118 {
119  assert(!RE.empty());
120  SBuf regexp;
121  static const SBuf openparen("("), closeparen(")"), separator(")|(");
122  JoinContainerIntoSBuf(regexp, RE.begin(), RE.end(), separator, openparen,
123  closeparen);
124  compileRE(curlist, regexp, flags);
125 }
126 
131 static void
132 compileOptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl, const int flagsAtLineStart)
133 {
134  std::list<RegexPattern> newlist;
135  SBufList accumulatedRE;
136  int numREs = 0, reSize = 0;
137  auto flags = flagsAtLineStart;
138 
139  for (const SBuf & configurationLineWord : sl) {
140  static const SBuf minus_i("-i");
141  static const SBuf plus_i("+i");
142  if (configurationLineWord == minus_i) {
143  if (flags & REG_ICASE) {
144  /* optimisation of -i ... -i */
145  debugs(28, 2, "optimisation of -i ... -i" );
146  } else {
147  debugs(28, 2, "-i" );
148  if (!accumulatedRE.empty()) {
149  compileREs(newlist, accumulatedRE, flags);
150  accumulatedRE.clear();
151  reSize = 0;
152  }
153  flags |= REG_ICASE;
154  }
155  continue;
156  } else if (configurationLineWord == plus_i) {
157  if ((flags & REG_ICASE) == 0) {
158  /* optimisation of +i ... +i */
159  debugs(28, 2, "optimisation of +i ... +i");
160  } else {
161  debugs(28, 2, "+i");
162  if (!accumulatedRE.empty()) {
163  compileREs(newlist, accumulatedRE, flags);
164  accumulatedRE.clear();
165  reSize = 0;
166  }
167  flags &= ~REG_ICASE;
168  }
169  continue;
170  }
171 
172  debugs(28, 2, "adding RE '" << configurationLineWord << "'");
173  accumulatedRE.push_back(configurationLineWord);
174  ++numREs;
175  reSize += configurationLineWord.length();
176 
177  if (reSize > 1024) { // must be < BUFSIZ everything included
178  debugs(28, 2, "buffer full, generating new optimised RE..." );
179  compileREs(newlist, accumulatedRE, flags);
180  accumulatedRE.clear();
181  reSize = 0;
182  continue; /* do the loop again to add the RE to largeRE */
183  }
184  }
185 
186  if (!accumulatedRE.empty()) {
187  compileREs(newlist, accumulatedRE, flags);
188  accumulatedRE.clear();
189  reSize = 0;
190  }
191 
192  /* all was successful, so put the new list at the tail */
193  curlist.splice(curlist.end(), newlist);
194 
195  debugs(28, 2, numREs << " REs are optimised into one RE.");
196  if (numREs > 100) {
198  debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " <<
199  "Consider using less REs or use rules without expressions like 'dstdomain'.");
200  }
201 }
202 
203 static void
204 compileUnoptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl, const int flagsAtLineStart)
205 {
206  auto flags = flagsAtLineStart;
207 
208  static const SBuf minus_i("-i"), plus_i("+i");
209  for (const auto &configurationLineWord: sl) {
210  if (configurationLineWord == minus_i) {
211  flags |= REG_ICASE;
212  } else if (configurationLineWord == plus_i) {
213  flags &= ~REG_ICASE;
214  } else {
215  compileRE(curlist, configurationLineWord, flags);
216  }
217  }
218 }
219 
220 void
222 {
223  debugs(28, 2, "new Regex line or file");
224 
225  int flagsAtLineStart = REG_EXTENDED | REG_NOSUB;
226  if (CaseInsensitive_)
227  flagsAtLineStart |= REG_ICASE;
228 
229  SBufList sl;
230  while (char *t = ConfigParser::RegexStrtokFile()) {
231  const char *clean = removeUnnecessaryWildcards(t);
232  debugs(28, 3, "buffering RE '" << clean << "'");
233  sl.emplace_back(clean);
234  }
235 
236  try {
237  // ignore the danger of merging invalid REs into a valid "optimized" RE
238  compileOptimisedREs(data, sl, flagsAtLineStart);
239  } catch (...) {
240  compileUnoptimisedREs(data, sl, flagsAtLineStart);
241  // Delay compileOptimisedREs() failure reporting until we know that
242  // compileUnoptimisedREs() above have succeeded. If
243  // compileUnoptimisedREs() also fails, then the compileOptimisedREs()
244  // exception caught earlier was probably not related to _optimization_
245  // (and we do not want to report the same RE compilation problem twice).
246  debugs(28, DBG_IMPORTANT, "WARNING: Failed to optimize a set of regular expressions; will use them as-is instead;" <<
247  Debug::Extra << "configuration: " << cfg_filename << " line " << config_lineno << ": " << config_input_line <<
248  Debug::Extra << "optimization error: " << CurrentException);
249  }
250 }
251 
252 bool
254 {
255  return data.empty();
256 }
257 
std::vector< const Option * > Options
Definition: Options.h:217
static void compileOptimisedREs(std::list< RegexPattern > &curlist, const SBufList &sl, const int flagsAtLineStart)
Definition: RegexData.cc:132
~ACLRegexData() override
Definition: RegexData.cc:31
std::list< SBuf > SBufList
Definition: forward.h:22
bool empty() const override
Definition: RegexData.cc:253
Definition: SBuf.h:93
void parse() override
Definition: RegexData.cc:221
SBuf & JoinContainerIntoSBuf(SBuf &dest, const ContainerIterator &begin, const ContainerIterator &end, const SBuf &separator, const SBuf &prefix=SBuf(), const SBuf &suffix=SBuf())
Definition: Algorithms.h:68
static void compileRE(std::list< RegexPattern > &curlist, const SBuf &RE, int flags)
Definition: RegexData.cc:111
static void compileUnoptimisedREs(std::list< RegexPattern > &curlist, const SBufList &sl, const int flagsAtLineStart)
Definition: RegexData.cc:204
const BooleanOption & CaseSensitivityOption()
Definition: Options.cc:241
const Acl::Options & lineOptions() override
supported ACL "line" options (e.g., "-i")
Definition: RegexData.cc:36
static Acl::BooleanOptionValue CaseInsensitive_
whether parse() is called in a case insensitive context
Definition: RegexData.h:31
const_iterator begin() const
Definition: SBuf.h:587
#define assert(EX)
Definition: assert.h:17
bool match(char const *user) override
Definition: RegexData.cc:45
const char * cfg_filename
Definition: cache_cf.cc:271
std::list< RegexPattern > data
Definition: RegexData.h:36
SBuf buf()
bytes written so far
Definition: Stream.h:41
std::ostream & CurrentException(std::ostream &os)
prints active (i.e., thrown but not yet handled) exception
static char * RegexStrtokFile()
int config_lineno
Definition: cache_cf.cc:272
static std::ostream & Extra(std::ostream &)
Definition: debug.cc:1316
static void compileREs(std::list< RegexPattern > &curlist, const SBufList &RE, int flags)
Definition: RegexData.cc:117
int opt_parse_cfg_only
#define DBG_IMPORTANT
Definition: Stream.h:38
char config_input_line[BUFSIZ]
Definition: cache_cf.cc:273
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Stream.h:192
static const char * removeUnnecessaryWildcards(char *t)
Definition: RegexData.cc:80
void print(std::ostream &os, const RegexPattern *previous=nullptr) const
Definition: RegexPattern.cc:42
SBufList dump() const override
Definition: RegexData.cc:66

 

Introduction

Documentation

Support

Miscellaneous