HTTPHeader-modif-by-FTRIF.cpp
1 |
//Please refer to http://dansguardian.org/?page=copyright2
|
---|---|
2 |
//for the license for this code.
|
3 |
//Written by Daniel Barron (daniel@//jadeb.com).
|
4 |
//For support go to http://groups.yahoo.com/group/dansguardian
|
5 |
|
6 |
// This program is free software; you can redistribute it and/or modify
|
7 |
// it under the terms of the GNU General Public License as published by
|
8 |
// the Free Software Foundation; either version 2 of the License, or
|
9 |
// (at your option) any later version.
|
10 |
//
|
11 |
// This program is distributed in the hope that it will be useful,
|
12 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 |
// GNU General Public License for more details.
|
15 |
//
|
16 |
// You should have received a copy of the GNU General Public License
|
17 |
// along with this program; if not, write to the Free Software
|
18 |
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19 |
|
20 |
//This file contains modifications suggested and mostly provided by
|
21 |
//Daniel Robbins 13/4/01 drobbins@gento.org
|
22 |
//Modifications include, but not limited to, getcontenttype(), << , >>
|
23 |
|
24 |
|
25 |
// INCLUDES
|
26 |
|
27 |
#ifdef HAVE_CONFIG_H
|
28 |
#include "dgconfig.h" |
29 |
#endif
|
30 |
#include "HTTPHeader.hpp" |
31 |
#include "Socket.hpp" |
32 |
#include "OptionContainer.hpp" |
33 |
#include "FDTunnel.hpp" |
34 |
|
35 |
#include <unistd.h> |
36 |
#include <sys/socket.h> |
37 |
#include <exception> |
38 |
#include <time.h> |
39 |
#include <syslog.h> |
40 |
#include <cerrno> |
41 |
#include <zlib.h> |
42 |
|
43 |
|
44 |
// GLOBALS
|
45 |
extern OptionContainer o;
|
46 |
|
47 |
// regexp for decoding %xx in URLs
|
48 |
extern RegExp urldecode_re;
|
49 |
|
50 |
|
51 |
// IMPLEMENTATION
|
52 |
|
53 |
// set timeout for socket operations
|
54 |
void HTTPHeader::setTimeout(int t) |
55 |
{ |
56 |
timeout = t; |
57 |
} |
58 |
|
59 |
// reset header object for future use
|
60 |
void HTTPHeader::reset()
|
61 |
{ |
62 |
if (dirty) {
|
63 |
header.clear(); |
64 |
//postdata.reset();
|
65 |
postdata[0] = '\0'; |
66 |
postdatalen = 0;
|
67 |
postdatachopped = false;
|
68 |
ispostupload = false;
|
69 |
waspersistent = false;
|
70 |
ispersistent = false;
|
71 |
|
72 |
cachedurl = "";
|
73 |
|
74 |
phost = NULL;
|
75 |
pport = NULL;
|
76 |
pcontentlength = NULL;
|
77 |
pcontenttype = NULL;
|
78 |
pproxyauthorization = NULL;
|
79 |
pcontentdisposition = NULL;
|
80 |
puseragent = NULL;
|
81 |
pxforwardedfor = NULL;
|
82 |
pcontentencoding = NULL;
|
83 |
pproxyconnection = NULL;
|
84 |
|
85 |
dirty = false;
|
86 |
} |
87 |
} |
88 |
|
89 |
// *
|
90 |
// *
|
91 |
// * header value and type checks
|
92 |
// *
|
93 |
// *
|
94 |
|
95 |
// grab request type (GET, HEAD etc.)
|
96 |
String HTTPHeader::requestType() |
97 |
{ |
98 |
return header.front().before(" "); |
99 |
} |
100 |
|
101 |
// grab return code
|
102 |
int HTTPHeader::returnCode()
|
103 |
{ |
104 |
return header.front().after(" ").before(" ").toInteger(); |
105 |
} |
106 |
|
107 |
// grab content length
|
108 |
off_t HTTPHeader::contentLength() |
109 |
{ |
110 |
// code 304 - not modified - no content
|
111 |
String temp(header.front().after(" "));
|
112 |
if (temp.startsWith("304")) |
113 |
return 0; |
114 |
if (pcontentlength != NULL) { |
115 |
temp = pcontentlength->after(" ");
|
116 |
return temp.toOffset();
|
117 |
} |
118 |
// no content-length header - we don't know
|
119 |
return -1; |
120 |
} |
121 |
|
122 |
// grab the auth type
|
123 |
String HTTPHeader::getAuthType() |
124 |
{ |
125 |
if (pproxyauthorization != NULL) { |
126 |
return pproxyauthorization->after(" ").before(" "); |
127 |
} |
128 |
return ""; |
129 |
} |
130 |
|
131 |
// check the request's return code to see if it's an auth required message
|
132 |
bool HTTPHeader::authRequired()
|
133 |
{ |
134 |
String temp(header.front().after(" "));
|
135 |
if (temp.startsWith("407")) { |
136 |
return true; |
137 |
} |
138 |
return false; |
139 |
} |
140 |
|
141 |
// grab content disposition
|
142 |
String HTTPHeader::disposition() |
143 |
{ |
144 |
if (pcontentdisposition != NULL) { |
145 |
String filename(pcontentdisposition->after("filename").after("=")); |
146 |
if (filename.contains(";")) |
147 |
filename = filename.before(";");
|
148 |
filename.removeWhiteSpace(); // incase of trailing space
|
149 |
if (filename.contains("\"")) { |
150 |
return filename.after("\"").before("\""); |
151 |
} |
152 |
return filename;
|
153 |
// example format:
|
154 |
// Content-Disposition: attachment; filename="filename.ext"
|
155 |
// Content-Disposition: attachment; filename=filename.ext
|
156 |
// Content-Disposition: filename="filename.ext"
|
157 |
// 3rd format encountered from download script on realVNC's
|
158 |
// website. notice it does not contain any semicolons! PRA 4-11-2005
|
159 |
} |
160 |
return ""; // it finds the header proposed filename |
161 |
} |
162 |
|
163 |
// grab the user agent
|
164 |
String HTTPHeader::userAgent() |
165 |
{ |
166 |
if (puseragent != NULL) { |
167 |
// chop off '/r'
|
168 |
String result(puseragent->after(" "));
|
169 |
result.resize(result.length() - 1);
|
170 |
return result;
|
171 |
} |
172 |
return ""; |
173 |
} |
174 |
|
175 |
// grab the content type header
|
176 |
String HTTPHeader::getContentType() |
177 |
{ |
178 |
if (pcontenttype != NULL) { |
179 |
String mimetype(pcontenttype->after(" "));
|
180 |
if (mimetype.length() < 1) |
181 |
return "-"; |
182 |
|
183 |
unsigned char c; |
184 |
size_t j = 0;
|
185 |
while (j < mimetype.length()) {
|
186 |
c = mimetype[j]; |
187 |
if (c == ' ' || c == ';' || c < 32) { // remove the |
188 |
mimetype = mimetype.subString(0, j);
|
189 |
// extra info not needed
|
190 |
j = 0;
|
191 |
} |
192 |
++j; |
193 |
} |
194 |
|
195 |
mimetype.toLower(); |
196 |
return mimetype;
|
197 |
} |
198 |
return "-"; |
199 |
} |
200 |
|
201 |
// does the given content type string match our headers?
|
202 |
bool HTTPHeader::isContentType(const String& t) |
203 |
{ |
204 |
return getContentType().startsWith(t);
|
205 |
} |
206 |
|
207 |
// grab contents of X-Forwarded-For header
|
208 |
// Modification based on a submitted patch by
|
209 |
// Jimmy Myrick (jmyrick@tiger1.tiger.org)
|
210 |
std::string HTTPHeader::getXForwardedForIP()
|
211 |
{ |
212 |
if (pxforwardedfor != NULL) { |
213 |
String line(pxforwardedfor->after(": "));
|
214 |
line.chop(); |
215 |
return std::string(line.toCharArray()); |
216 |
} |
217 |
return ""; |
218 |
} |
219 |
|
220 |
// check the return code to see if it's a redirection request
|
221 |
bool HTTPHeader::isRedirection()
|
222 |
{ |
223 |
// The 1st line of the header for a redirection is thus:
|
224 |
// HTTP/1.(0|1) 3xx
|
225 |
if (header.size() < 1) { |
226 |
return false; |
227 |
} // sometimes get called b 4 read
|
228 |
String answer(header.front().after(" ").before(" ")); |
229 |
if (answer[0] == '3' && answer.length() == 3) { |
230 |
return true; |
231 |
} |
232 |
return false; |
233 |
} |
234 |
|
235 |
// grab the contents of Proxy-Authorization header
|
236 |
// returns base64-decoding of the chunk of data after the auth type string
|
237 |
std::string HTTPHeader::getAuthData()
|
238 |
{ |
239 |
if (pproxyauthorization != NULL) { |
240 |
String line(pproxyauthorization->after(" ").after(" ")); |
241 |
return decodeb64(line); // it's base64 MIME encoded |
242 |
} |
243 |
return ""; |
244 |
} |
245 |
|
246 |
// grab raw contents of Proxy-Authorization header without decoding
|
247 |
std::string HTTPHeader::getRawAuthData()
|
248 |
{ |
249 |
if (pproxyauthorization != NULL) { |
250 |
return pproxyauthorization->after(" ").after(" "); |
251 |
} |
252 |
return ""; |
253 |
} |
254 |
|
255 |
// do we have a non-identity content encoding? this means body is compressed
|
256 |
bool HTTPHeader::isCompressed()
|
257 |
{ |
258 |
if (pcontentencoding != NULL) { |
259 |
if (pcontentencoding->indexOf("identity") != -1) { |
260 |
// http1.1 says this
|
261 |
// should not be here, but not must not
|
262 |
return false; |
263 |
} |
264 |
#ifdef DGDEBUG
|
265 |
std::cout << "is compressed" << std::endl;
|
266 |
#endif
|
267 |
return true; // i.e. encoded with something other than clear |
268 |
} |
269 |
return false; |
270 |
} |
271 |
|
272 |
// grab content encoding header
|
273 |
String HTTPHeader::contentEncoding() |
274 |
{ |
275 |
if (pcontentencoding != NULL) { |
276 |
String ce(pcontentencoding->after(": "));
|
277 |
ce.toLower(); |
278 |
return ce;
|
279 |
} |
280 |
return ""; // we need a default don't we? |
281 |
} |
282 |
|
283 |
// *
|
284 |
// *
|
285 |
// * header modifications
|
286 |
// *
|
287 |
// *
|
288 |
|
289 |
// squid adds this so if more support it it may be useful one day
|
290 |
void HTTPHeader::addXForwardedFor(const std::string &clientip) |
291 |
{ |
292 |
std::string line("X-Forwarded-For: " + clientip + "\r"); |
293 |
header.push_back(String(line.c_str())); |
294 |
} |
295 |
|
296 |
// set content length header to report given lenth
|
297 |
void HTTPHeader::setContentLength(int newlen) |
298 |
{ |
299 |
if (pcontentlength != NULL) { |
300 |
(*pcontentlength) = "Content-Length: " + String(newlen) + "\r"; |
301 |
} |
302 |
} |
303 |
|
304 |
// set the proxy-connection header to allow persistence (or not)
|
305 |
void HTTPHeader::makePersistent(bool persist) |
306 |
{ |
307 |
if (persist) {
|
308 |
// Only make persistent if it originally was, but now isn't.
|
309 |
// The intention isn't to change browser behaviour, just to
|
310 |
// un-do any connection downgrading which DG may have performed
|
311 |
// earlier.
|
312 |
if (waspersistent && !ispersistent) {
|
313 |
if (pproxyconnection != NULL) { |
314 |
(*pproxyconnection) = pproxyconnection->before(":") + ": Keep-Alive\r"; |
315 |
} else {
|
316 |
header.push_back(String("Proxy-Connection: Keep-Alive\r"));
|
317 |
pproxyconnection = &(header.back()); |
318 |
} |
319 |
ispersistent = true;
|
320 |
} |
321 |
} else {
|
322 |
// Only downgrade to non-persistent if it isn't currently persistent.
|
323 |
if (ispersistent) {
|
324 |
if (pproxyconnection != NULL) { |
325 |
(*pproxyconnection) = pproxyconnection->before(":") + ": Close\r"; |
326 |
} else {
|
327 |
header.push_back(String("Proxy-Connection: Close\r"));
|
328 |
pproxyconnection = &(header.back()); |
329 |
} |
330 |
ispersistent = false;
|
331 |
} |
332 |
} |
333 |
} |
334 |
|
335 |
// return a modified accept-encoding header, based on the one supplied,
|
336 |
// but with "identity" added and only supported encodings allowed.
|
337 |
String HTTPHeader::modifyEncodings(String e) |
338 |
{ |
339 |
|
340 |
// There are 4 types of encoding: gzip, deflate, compress and identity
|
341 |
// deflate is in zlib format
|
342 |
// compress is in unix compress format
|
343 |
// identity is uncompressed and supported by all browsers (obviously)
|
344 |
// we do not support compress
|
345 |
|
346 |
e.toLower(); |
347 |
String o("Accept-Encoding: identity");
|
348 |
#if ZLIB_VERNUM < 0x1210 |
349 |
#warning 'Accept-Encoding: gzip' is disabled |
350 |
#else
|
351 |
if (e.contains("gzip")) { |
352 |
o += ",gzip";
|
353 |
} |
354 |
#endif
|
355 |
if (e.contains("deflate")) { |
356 |
o += ",deflate";
|
357 |
} |
358 |
|
359 |
return o;
|
360 |
} |
361 |
|
362 |
// set content length to report the given length, and strip content encoding
|
363 |
void HTTPHeader::removeEncoding(int newlen) |
364 |
{ |
365 |
if (pcontentlength != NULL) { |
366 |
(*pcontentlength) = "Content-Length: " + String(newlen) + "\r"; |
367 |
} |
368 |
// this may all be overkill. since we strip everything out of the outgoing
|
369 |
// accept-encoding header that we don't support, we won't be getting anything
|
370 |
// back again that we don't support, in theory. leave new code commented
|
371 |
// unless it proves to be necessary further down the line. PRA 20-10-2005
|
372 |
if (pcontentencoding != NULL) { |
373 |
/*#ifdef DGDEBUG
|
374 |
std::cout << std::endl << "Stripping Content-Encoding header" <<std::endl;
|
375 |
std::cout << "Old: " << header[i] <<std::endl;
|
376 |
#endif
|
377 |
// only strip supported compression types
|
378 |
String temp(header[i].after(":"));
|
379 |
temp.removeWhiteSpace();
|
380 |
String newheader;
|
381 |
// iterate over comma-separated list of encodings
|
382 |
while (temp.length() != 0) {
|
383 |
if (!(temp.startsWith("gzip") || temp.startsWith("deflate"))) {
|
384 |
// add other, unstripped encoding types back into the header
|
385 |
if (newheader.length() != 0)
|
386 |
newheader += ", ";
|
387 |
newheader += (temp.before(",").length() != 0 ? temp.before(",") : temp);
|
388 |
}
|
389 |
temp = temp.after(",");
|
390 |
temp.removeWhiteSpace();
|
391 |
}
|
392 |
if (newheader.length() == 0)*/
|
393 |
(*pcontentencoding) = "X-DansGuardian-Removed: Content-Encoding\r";
|
394 |
/* else
|
395 |
header[i] = "Content-Encoding: "+newheader;
|
396 |
#ifdef DGDEBUG
|
397 |
std::cout << "New: " << header[i] << std::endl << std::endl;
|
398 |
#endif*/
|
399 |
} |
400 |
} |
401 |
|
402 |
// modifies the URL in all relevant header lines after a regexp search and replace
|
403 |
// setURL Code originally from from Ton Gorter 2004
|
404 |
void HTTPHeader::setURL(String &url) {
|
405 |
//Modif FTRIF
|
406 |
//String hostname;
|
407 |
String hostname,credentials; |
408 |
//Fin Modif FTRIF
|
409 |
|
410 |
bool https = (url.before("://") == "https"); |
411 |
int port = (https ? 443 : 80); |
412 |
|
413 |
if (!url.after("://").contains("/")) { |
414 |
url += "/";
|
415 |
} |
416 |
hostname = url.after("://").before("/"); |
417 |
if (hostname.contains("@")) { // Contains a username:password combo |
418 |
hostname = hostname.after("@");
|
419 |
} |
420 |
if (hostname.contains(":")) { |
421 |
port = hostname.after(":").toInteger();
|
422 |
if (port == 0 || port > 65535) { |
423 |
port = (https ? 443 : 80); |
424 |
} |
425 |
hostname = hostname.before(":"); // chop off the port bit |
426 |
} |
427 |
|
428 |
|
429 |
//Ajout FTRIF
|
430 |
//Restore stripped credentials
|
431 |
credentials="";
|
432 |
if (header.front().after("://").before(hostname.toCharArray()).contains("@")) |
433 |
{ // Contains a username:password combo
|
434 |
credentials = header.front().after("://").before(hostname.toCharArray());
|
435 |
} |
436 |
//Fin ajout FTRIF
|
437 |
|
438 |
#ifdef DGDEBUG
|
439 |
std::cout << "setURL: header.front() changed from: " << header.front() << std::endl;
|
440 |
#endif
|
441 |
if (!https)
|
442 |
//Modif FTRIF
|
443 |
//header.front() = header.front().before(" ") + " " + url + " " + header.front().after(" ").after(" ");
|
444 |
header.front() = header.front().before(" ") + " " + url.before("://") + "://" + credentials + url.after("://") + " " + header.front().after(" ").after(" "); |
445 |
//Fin Modif FTRIF
|
446 |
else
|
447 |
// Should take form of "CONNECT example.com:443 HTTP/1.0" for SSL
|
448 |
header.front() = header.front().before(" ") + " " + hostname + ":" + String(port) + " " + header.front().after(" ").after(" "); |
449 |
#ifdef DGDEBUG
|
450 |
std::cout << " to: " << header.front() << std::endl;
|
451 |
#endif
|
452 |
|
453 |
if (phost != NULL) { |
454 |
#ifdef DGDEBUG
|
455 |
std::cout << "setURL: header[] line changed from: " << (*phost) << std::endl;
|
456 |
#endif
|
457 |
(*phost) = String("Host: ") + hostname;
|
458 |
if (port != (https ? 443 : 80)) |
459 |
{ |
460 |
(*phost) += ":";
|
461 |
(*phost) += String(port); |
462 |
} |
463 |
(*phost) += "\r";
|
464 |
#ifdef DGDEBUG
|
465 |
std::cout << " to " << (*phost) << std::endl;
|
466 |
#endif
|
467 |
} |
468 |
if (pport != NULL) { |
469 |
#ifdef DGDEBUG
|
470 |
std::cout << "setURL: header[] line changed from: " << (*pport) << std::endl;
|
471 |
#endif
|
472 |
(*pport) = String("Port: ") + String(port) + "\r"; |
473 |
#ifdef DGDEBUG
|
474 |
std::cout << " to " << (*pport) << std::endl;
|
475 |
#endif
|
476 |
} |
477 |
// Don't just cache the URL we're sent - url() performs some other
|
478 |
// processing, notably stripping the port part. Caching here will
|
479 |
// bypass all that.
|
480 |
//cachedurl = url.toCharArray();
|
481 |
} |
482 |
|
483 |
// Does a regexp search and replace.
|
484 |
// urlRegExp Code originally from from Ton Gorter 2004
|
485 |
bool HTTPHeader::regExp(String& line, std::deque<RegExp>& regexp_list, std::deque<String>& replacement_list) {
|
486 |
RegExp *re; |
487 |
String replacement; |
488 |
String repstr; |
489 |
String newLine; |
490 |
bool linemodified = false; |
491 |
unsigned int i; |
492 |
unsigned int j, k; |
493 |
unsigned int s = regexp_list.size(); |
494 |
unsigned int matches, submatches; |
495 |
unsigned int match; |
496 |
unsigned int srcoff; |
497 |
unsigned int nextoffset; |
498 |
unsigned int matchlen; |
499 |
unsigned int oldlinelen; |
500 |
|
501 |
// iterate over our list of precompiled regexes
|
502 |
for (i = 0; i < s; i++) { |
503 |
newLine = "";
|
504 |
re = &(regexp_list[i]); |
505 |
if (re->match(line.toCharArray())) {
|
506 |
repstr = replacement_list[i]; |
507 |
matches = re->numberOfMatches(); |
508 |
|
509 |
srcoff = 0;
|
510 |
|
511 |
for (j = 0; j < matches; j++) { |
512 |
nextoffset = re->offset(j); |
513 |
matchlen = re->length(j); |
514 |
|
515 |
// copy next chunk of unmodified data
|
516 |
if (nextoffset > srcoff) {
|
517 |
newLine += line.subString(srcoff, nextoffset - srcoff); |
518 |
srcoff = nextoffset; |
519 |
} |
520 |
|
521 |
// Count number of submatches (brackets) in replacement string
|
522 |
for (submatches = 0; j+submatches+1 < matches; submatches++) |
523 |
if (re->offset(j+submatches+1) + re->length(j+submatches+1) > srcoff + matchlen) |
524 |
break;
|
525 |
|
526 |
// \1 and $1 replacement
|
527 |
replacement = "";
|
528 |
for (k = 0; k < repstr.length(); k++) { |
529 |
// find \1..\9 and $1..$9 and fill them in with submatched strings
|
530 |
if ((repstr[k] == '\\' || repstr[k] == '$') && repstr[k+1] >= '1' && repstr[k+1] <= '9') { |
531 |
match = repstr[++k] - '0';
|
532 |
if (match <= submatches) {
|
533 |
replacement += re->result(j + match).c_str(); |
534 |
} |
535 |
} else {
|
536 |
// unescape \\ and \$, and add non-backreference characters to string
|
537 |
if (repstr[k] == '\\' && (repstr[k+1] == '\\' || repstr[k+1] == '$')) |
538 |
k++; |
539 |
replacement += repstr.subString(k, 1);
|
540 |
} |
541 |
} |
542 |
|
543 |
// copy filled in replacement string
|
544 |
newLine += replacement; |
545 |
srcoff += matchlen; |
546 |
j += submatches; |
547 |
} |
548 |
oldlinelen = line.length(); |
549 |
if (srcoff < oldlinelen) {
|
550 |
newLine += line.subString(srcoff, oldlinelen - srcoff); |
551 |
} |
552 |
#ifdef DGDEBUG
|
553 |
std::cout << "Line modified! (" << line << " -> " << newLine << ")" << std::endl; |
554 |
#endif
|
555 |
// copy newLine into line and continue with other regexes
|
556 |
line = newLine; |
557 |
linemodified = true;
|
558 |
} |
559 |
} |
560 |
|
561 |
return linemodified;
|
562 |
} |
563 |
|
564 |
// Perform searches and replacements on URL
|
565 |
bool HTTPHeader::urlRegExp(int filtergroup) { |
566 |
// exit immediately if list is empty
|
567 |
if (not o.fg[filtergroup]->url_regexp_list_comp.size()) |
568 |
return false; |
569 |
#ifdef DGDEBUG
|
570 |
std::cout << "Starting URL reg exp replace" << std::endl;
|
571 |
#endif
|
572 |
String newUrl(url()); |
573 |
if (regExp(newUrl, o.fg[filtergroup]->url_regexp_list_comp, o.fg[filtergroup]->url_regexp_list_rep)) {
|
574 |
setURL(newUrl); |
575 |
return true; |
576 |
} |
577 |
return false; |
578 |
} |
579 |
|
580 |
// Perform searches and replacements on header lines
|
581 |
bool HTTPHeader::headerRegExp(int filtergroup) { |
582 |
// exit immediately if list is empty
|
583 |
if (not o.fg[filtergroup]->header_regexp_list_comp.size()) |
584 |
return false; |
585 |
bool result = false; |
586 |
for (std::deque<String>::iterator i = header.begin(); i != header.end(); i++) {
|
587 |
#ifdef DGDEBUG
|
588 |
std::cout << "Starting header reg exp replace: " << *i << std::endl;
|
589 |
#endif
|
590 |
bool chop = false; |
591 |
if (i->endsWith("\r")) |
592 |
{ |
593 |
i->chop(); |
594 |
chop = true;
|
595 |
} |
596 |
result |= regExp(*i, o.fg[filtergroup]->header_regexp_list_comp, o.fg[filtergroup]->header_regexp_list_rep); |
597 |
if (chop)
|
598 |
i->append("\r");
|
599 |
} |
600 |
return result;
|
601 |
} |
602 |
|
603 |
// *
|
604 |
// *
|
605 |
// * detailed header checks & fixes
|
606 |
// *
|
607 |
// *
|
608 |
|
609 |
// is a URL malformed?
|
610 |
bool HTTPHeader::malformedURL(const String& url) |
611 |
{ |
612 |
String host(url.after("://"));
|
613 |
if (host.contains("/")) |
614 |
host = host.before("/");
|
615 |
if (host.length() < 2) { |
616 |
#ifdef DGDEBUG
|
617 |
std::cout << "host len too small" << std::endl;
|
618 |
#endif
|
619 |
return true; |
620 |
} |
621 |
if (host.contains(":")) |
622 |
host = host.before(":");
|
623 |
if (host.contains("..") || host.endsWith(".")) { |
624 |
#ifdef DGDEBUG
|
625 |
std::cout << "double dots in domain name" << std::endl;
|
626 |
#endif
|
627 |
return true; |
628 |
} |
629 |
int i, len;
|
630 |
unsigned char c; |
631 |
len = host.length(); |
632 |
bool containsletter = false; |
633 |
for (i = 0; i < len; i++) { |
634 |
c = (unsigned char) host[i]; |
635 |
// If it contains something other than numbers, dots, or [a-fx] (hex encoded IPs),
|
636 |
// IP obfuscation can be ruled out.
|
637 |
if (!containsletter &&
|
638 |
(((c < '0') || (c > '9')) |
639 |
&& (c != '.') && (c != 'x') && (c != 'X') |
640 |
&& ((c < 'a') || (c > 'f')) |
641 |
&& ((c < 'A') || (c > 'F')))) |
642 |
containsletter = true;
|
643 |
if (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') |
644 |
&& !(c >= '0' && c <= '9') && c != '.' && c != '-' && c != '_') { |
645 |
#ifdef DGDEBUG
|
646 |
std::cout << "bad char in hostname" << std::endl;
|
647 |
#endif
|
648 |
return true; |
649 |
// only allowed letters, digits, hiphen, dots
|
650 |
} |
651 |
|
652 |
} |
653 |
// no IP obfuscation going on
|
654 |
if (containsletter)
|
655 |
return false; |
656 |
#ifdef DGDEBUG
|
657 |
else
|
658 |
std::cout << "Checking for IP obfuscation in " << host << std::endl;
|
659 |
#endif
|
660 |
// Check no IP obfuscation is going on
|
661 |
// This includes IPs encoded as a single decimal number,
|
662 |
// fully or partly hex encoded, and octal encoded
|
663 |
bool first = true; |
664 |
bool obfuscation = false; |
665 |
if (host.endsWith(".")) |
666 |
host.chop(); |
667 |
do {
|
668 |
if (!first)
|
669 |
host = host.after(".");
|
670 |
first = false;
|
671 |
String hostpart(host); |
672 |
if (host.contains(".")) |
673 |
hostpart = hostpart.before(".");
|
674 |
// If any part of the host starts with a letter, any letter,
|
675 |
// then we must have a hostname rather than an IP (obscured
|
676 |
// or otherwise). TLDs never start with a number.
|
677 |
if ((hostpart[0] >= 'a' && hostpart[0] <= 'z') || (hostpart[0] >= 'A' && hostpart[0] <= 'Z')) |
678 |
return false; |
679 |
// If any part of the host begins with 0, it may be hex or octal
|
680 |
if ((hostpart[0] == '0') && (hostpart.length() > 1)) |
681 |
{ |
682 |
obfuscation = true;
|
683 |
continue;
|
684 |
} |
685 |
// Also check range, for decimal obfuscation.
|
686 |
int part = hostpart.toInteger();
|
687 |
if ((part < 0) || (part > 255)) |
688 |
obfuscation = true;
|
689 |
} while (host.contains(".")); |
690 |
// If we have any obfuscated parts, and haven't proven it's a hostname, it's invalid.
|
691 |
return obfuscation;
|
692 |
} |
693 |
|
694 |
// is this a POST request encapsulating a file upload?
|
695 |
bool HTTPHeader::isPostUpload(Socket &peersock)
|
696 |
{ |
697 |
if (header.front().toCharArray()[0] != 'P') { |
698 |
return false; |
699 |
} |
700 |
|
701 |
/*bool answer = false;
|
702 |
int postlen = postdata.buffer_length;
|
703 |
int i;
|
704 |
if (postlen < 14) { // min length for there to be a match
|
705 |
return false;
|
706 |
}
|
707 |
char *postdatablock = new char[postlen + 64]; // extra 64 for search
|
708 |
try {
|
709 |
postdata.copyToMemory(postdatablock);
|
710 |
for (i = 0; i < postlen; i++) { // make lowercase char by char
|
711 |
if (isupper(postdatablock[i])) {
|
712 |
postdatablock[i] = tolower(postdatablock[i]);
|
713 |
}
|
714 |
}
|
715 |
RegExp mysearch;
|
716 |
std::string dis("content-type: "); // signifies file upload
|
717 |
char *p = new char[32];
|
718 |
try {
|
719 |
for (i = 0; i < (signed) dis.length(); i++) {
|
720 |
p[i] = dis[i]; // copy it to the block of memory
|
721 |
}
|
722 |
char *pend = p + dis.length(); // pointer for search
|
723 |
char *postdatablockend = postdatablock + postlen;
|
724 |
// search the post data for the content type header
|
725 |
char *res = mysearch.search(postdatablock, postdatablockend, p, pend);
|
726 |
// if we searched all the way to the end without finding it,
|
727 |
// there is no post upload going on; otherwise, there is
|
728 |
if (res != postdatablockend) {
|
729 |
answer = true;
|
730 |
}
|
731 |
}
|
732 |
catch(exception & e) {
|
733 |
};
|
734 |
delete[]p;
|
735 |
}
|
736 |
catch(exception & e) {
|
737 |
};
|
738 |
delete[]postdatablock;
|
739 |
return answer;*/
|
740 |
|
741 |
off_t cl = contentLength(); |
742 |
if (((cl > 0) && (cl < 14)) || (getContentType() == "application/x-www-form-urlencoded")) { |
743 |
#ifdef DGDEBUG
|
744 |
std::cout << "Based on content length/type, is not POST upload!" << std::endl;
|
745 |
#endif
|
746 |
ispostupload = false;
|
747 |
return false; |
748 |
} |
749 |
if (getContentType().length() > 0) { |
750 |
#ifdef DGDEBUG
|
751 |
std::cout << "Based on content length/type, is POST upload!" << std::endl;
|
752 |
#endif
|
753 |
ispostupload = true;
|
754 |
return true; |
755 |
} |
756 |
|
757 |
#ifdef DGDEBUG
|
758 |
std::cout << "Reading a line of POST data to determine content type: ";
|
759 |
#endif
|
760 |
postdatalen = peersock.getLine(postdata, 14, 60, &postdatachopped); |
761 |
#ifdef DGDEBUG
|
762 |
std::cout << postdata << std::endl; |
763 |
#endif
|
764 |
if (postdatalen != 14) { |
765 |
#ifdef DGDEBUG
|
766 |
std::cout << "Is not POST upload!" << std::endl;
|
767 |
#endif
|
768 |
ispostupload = false;
|
769 |
return false; |
770 |
} |
771 |
String conttype(postdata); |
772 |
if (conttype.startsWithLower("content-type: ")) { |
773 |
#ifdef DGDEBUG
|
774 |
std::cout << "Is POST upload!" << std::endl;
|
775 |
#endif
|
776 |
ispostupload = true;
|
777 |
return true; |
778 |
} else {
|
779 |
#ifdef DGDEBUG
|
780 |
std::cout << "Is not POST upload!" << std::endl;
|
781 |
#endif
|
782 |
ispostupload = false;
|
783 |
return false; |
784 |
} |
785 |
} |
786 |
|
787 |
// fix bugs in certain web servers that don't obey standards.
|
788 |
// actually, it's us that don't obey standards - HTTP RFC says header names
|
789 |
// are case-insensitive. - Anonymous SF Poster, 2006-02-23
|
790 |
void HTTPHeader::checkheader(bool allowpersistent) |
791 |
{ |
792 |
// are these headers outgoing, or incoming?
|
793 |
bool outgoing = true; |
794 |
if (header.front().startsWith("HT")) |
795 |
outgoing = false;
|
796 |
|
797 |
bool first = true; |
798 |
for (std::deque<String>::iterator i = header.begin(); i != header.end(); i++) { // check each line in the headers |
799 |
// HTTP 1.1 is persistent by default
|
800 |
if (first) {
|
801 |
if (i->after("HTTP/").startsWith("1.1")) { |
802 |
#ifdef DGDEBUG
|
803 |
std::cout << "CheckHeader: HTTP/1.1, so assuming persistency" << std::endl;
|
804 |
#endif
|
805 |
waspersistent = true;
|
806 |
ispersistent = true;
|
807 |
} |
808 |
|
809 |
// Do not allow persistent connections on CONNECT requests - the browser thinks it has a tunnel
|
810 |
// directly to the external server, not a connection to the proxy, so it won't be re-used in the
|
811 |
// manner expected by DG and will result in waiting for time-outs. Bug identified by Jason Deasi.
|
812 |
if ((*i)[0] == 'C') { |
813 |
#ifdef DGDEBUG
|
814 |
std::cout << "CheckHeader: CONNECT request; disallowing persistency" << std::endl;
|
815 |
#endif
|
816 |
allowpersistent = false;
|
817 |
} |
818 |
|
819 |
first = false;
|
820 |
|
821 |
// force HTTP/1.0 - we don't support chunked transfer encoding, possibly amongst other things
|
822 |
if (outgoing)
|
823 |
(*i) = i->before(" HTTP/") + " HTTP/1.0\r"; |
824 |
} |
825 |
// index headers - try to perform the checks in the order the average browser sends the headers.
|
826 |
// also only do the necessary checks for the header type (sent/received).
|
827 |
else if (outgoing && (phost == NULL) && i->startsWithLower("host:")) { |
828 |
phost = &(*i); |
829 |
} |
830 |
// don't allow through multiple host headers
|
831 |
else if (outgoing && (phost != NULL) && i->startsWithLower("host:")) { |
832 |
i->assign("X-DG-IgnoreMe: removed multiple host headers\r");
|
833 |
} |
834 |
else if (outgoing && (puseragent == NULL) && i->startsWithLower("user-agent:")) { |
835 |
puseragent = &(*i); |
836 |
} |
837 |
else if (outgoing && i->startsWithLower("accept-encoding:")) { |
838 |
(*i) = "Accept-Encoding:" + i->after(":"); |
839 |
(*i) = modifyEncodings(*i) + "\r";
|
840 |
} |
841 |
else if ((pcontenttype == NULL) && i->startsWithLower("content-type:")) { |
842 |
pcontenttype = &(*i); |
843 |
} |
844 |
else if ((pcontentlength == NULL) && i->startsWithLower("content-length:")) { |
845 |
pcontentlength = &(*i); |
846 |
} |
847 |
// is this ever sent outgoing?
|
848 |
else if ((pcontentdisposition == NULL) && i->startsWithLower("content-disposition:")) { |
849 |
pcontentdisposition = &(*i); |
850 |
} |
851 |
else if ((!outgoing) && (pcontentencoding == NULL) && i->startsWithLower("content-encoding:")) { |
852 |
pcontentencoding = &(*i); |
853 |
} |
854 |
else if ((pproxyauthorization == NULL) && i->startsWithLower("proxy-authorization:")) { |
855 |
pproxyauthorization = &(*i); |
856 |
} |
857 |
else if ((pproxyconnection == NULL) && (i->startsWithLower("proxy-connection:") || i->startsWithLower("connection:"))) { |
858 |
#ifdef DGDEBUG
|
859 |
std::cout << "CheckHeader: Found Proxy-Connection" << std::endl;
|
860 |
#endif
|
861 |
if (i->contains("live")) { |
862 |
#ifdef DGDEBUG
|
863 |
std::cout << "CheckHeader: P-C says keep-alive" << std::endl;
|
864 |
#endif
|
865 |
waspersistent = true;
|
866 |
if (!allowpersistent) {
|
867 |
#ifdef DGDEBUG
|
868 |
std::cout << "CheckHeader: ... but we aren't allowed to" << std::endl;
|
869 |
#endif
|
870 |
ispersistent = false;
|
871 |
(*i) = i->before(":") + ": Close\r"; |
872 |
} else {
|
873 |
ispersistent = true;
|
874 |
} |
875 |
} else {
|
876 |
#ifdef DGDEBUG
|
877 |
std::cout << "CheckHeader: P-C says close" << std::endl;
|
878 |
#endif
|
879 |
ispersistent = false;
|
880 |
waspersistent = false;
|
881 |
} |
882 |
pproxyconnection = &(*i); |
883 |
} |
884 |
else if (outgoing && (pxforwardedfor == NULL) && i->startsWithLower("x-forwarded-for:")) { |
885 |
pxforwardedfor = &(*i); |
886 |
} |
887 |
// this one's non-standard, so check for it last
|
888 |
else if (outgoing && (pport = NULL) && i->startsWithLower("port:")) { |
889 |
pport = &(*i); |
890 |
} |
891 |
#ifdef DGDEBUG
|
892 |
std::cout << (*i) << std::endl; |
893 |
#endif
|
894 |
} |
895 |
#ifdef DGDEBUG
|
896 |
std::cout << "CheckHeader flags: AP=" << allowpersistent << " IP=" << ispersistent << " PPC=" << !(pproxyconnection == NULL) << std::endl; |
897 |
#endif
|
898 |
// if a request was HTTP 1.1 and there was no proxy-connection header, we may need to add one
|
899 |
if ((!allowpersistent) && ispersistent) {
|
900 |
// we should only be in this state if pproxyconnection == NULL (otherwise ispersistent will have been falsified earlier)
|
901 |
#ifdef DGDEBUG
|
902 |
std::cout << "CheckHeader: Adding our own Proxy-Connection: Close" << std::endl;
|
903 |
#endif
|
904 |
header.push_back("Proxy-Connection: Close\r");
|
905 |
pproxyconnection = &(header.back()); |
906 |
ispersistent = false;
|
907 |
} else if (allowpersistent && ispersistent && (pproxyconnection == NULL)) { |
908 |
#ifdef DGDEBUG
|
909 |
std::cout << "CheckHeader: Adding our own Proxy-Connection: Keep-Alive" << std::endl;
|
910 |
#endif
|
911 |
// we should only be in this state if HTTP 1.1, persistency allowed, but persistency not explicitly asked for
|
912 |
header.push_back("Proxy-Connection: Keep-Alive\r");
|
913 |
pproxyconnection = &(header.back()); |
914 |
} |
915 |
// Normalise request headers (fix host, port, first line of header, etc. to all be consistent)
|
916 |
if (outgoing)
|
917 |
{ |
918 |
String newurl(url(true));
|
919 |
setURL(newurl); |
920 |
} |
921 |
} |
922 |
|
923 |
// A request may be in the form:
|
924 |
// GET http://foo.bar:80/ HTML/1.0 (if :80 is omitted 80 is assumed)
|
925 |
// or:
|
926 |
// GET / HTML/1.0
|
927 |
// Host: foo.bar (optional header in HTTP/1.0, but like HTTP/1.1, we require it!)
|
928 |
// Port: 80 (not a standard header; do any clients send it?)
|
929 |
// or:
|
930 |
// CONNECT foo.bar:443 HTTP/1.1
|
931 |
// So we need to handle all 3
|
932 |
|
933 |
String HTTPHeader::url(bool withport)
|
934 |
{ |
935 |
// Version of URL *with* port is not cached,
|
936 |
// as vast majority of our code doesn't like
|
937 |
// port numbers in URLs.
|
938 |
if (cachedurl.length() > 0 && !withport) |
939 |
return cachedurl;
|
940 |
port = 80;
|
941 |
bool https = false; |
942 |
String hostname; |
943 |
String answer(header.front().after(" "));
|
944 |
answer.removeMultiChar(' ');
|
945 |
if (answer.after(" ").startsWith("HTTP/")) { |
946 |
answer = answer.before(" HTTP/");
|
947 |
} else {
|
948 |
answer = answer.before(" http/"); // just in case! |
949 |
} |
950 |
if (requestType() == "CONNECT") { |
951 |
https = true;
|
952 |
port = 443;
|
953 |
if (!answer.startsWith("https://")) { |
954 |
answer = "https://" + answer;
|
955 |
} |
956 |
} |
957 |
if (pport != NULL) { |
958 |
port = pport->after(" ").toInteger();
|
959 |
if (port == 0 || port > 65535) |
960 |
port = (https ? 443 : 80); |
961 |
} |
962 |
if (answer.length()) {
|
963 |
if (answer[0] == '/') { // must be the latter above |
964 |
if (phost != NULL) { |
965 |
hostname = phost->after(" ");
|
966 |
hostname.removeWhiteSpace(); |
967 |
if (hostname.contains(":")) |
968 |
{ |
969 |
port = hostname.after(":").toInteger();
|
970 |
if (port == 0 || port > 65535) { |
971 |
port = (https ? 443 : 80); |
972 |
} |
973 |
hostname = hostname.before(":");
|
974 |
} |
975 |
while (hostname.endsWith(".")) |
976 |
hostname.chop(); |
977 |
if (withport && (port != (https ? 443 : 80))) |
978 |
hostname += ":" + String(port);
|
979 |
hostname = "http://" + hostname;
|
980 |
answer = hostname + answer; |
981 |
} |
982 |
// Squid doesn't like requests in this format. Work around the fact.
|
983 |
header.front() = requestType() + " " + answer + " HTTP/" + header.front().after(" HTTP/"); |
984 |
} else { // must be in the form GET http://foo.bar:80/ HTML/1.0 |
985 |
if (!answer.after("://").contains("/")) { |
986 |
answer += "/"; // needed later on so correct host is extracted |
987 |
} |
988 |
String protocol(answer.before("://"));
|
989 |
hostname = answer.after("://");
|
990 |
String url(hostname.after("/"));
|
991 |
url.removeWhiteSpace(); // remove rubbish like ^M and blanks
|
992 |
if (url.length() > 0) { |
993 |
url = "/" + url;
|
994 |
} |
995 |
hostname = hostname.before("/"); // extra / was added 4 here |
996 |
if (hostname.contains("@")) { // Contains a username:password combo |
997 |
hostname = hostname.after("@");
|
998 |
} |
999 |
if (hostname.contains(":")) { |
1000 |
port = hostname.after(":").toInteger();
|
1001 |
if (port == 0 || port > 65535) { |
1002 |
port = (https ? 443 : 80); |
1003 |
} |
1004 |
hostname = hostname.before(":"); // chop off the port bit |
1005 |
} |
1006 |
while (hostname.endsWith(".")) |
1007 |
hostname.chop(); |
1008 |
if (withport && (port != (https ? 443 : 80))) |
1009 |
hostname += ":" + String(port);
|
1010 |
answer = protocol + "://" + hostname + url;
|
1011 |
} |
1012 |
} |
1013 |
if (answer.endsWith("//")) { |
1014 |
answer.chop(); |
1015 |
} |
1016 |
#ifdef DGDEBUG
|
1017 |
std::cout << "from header url:" << answer << std::endl;
|
1018 |
#endif
|
1019 |
// Don't include port numbers in the URL in the cached version.
|
1020 |
// Most of the code only copes with URLs *without* port specifiers.
|
1021 |
if (!withport)
|
1022 |
cachedurl = answer.toCharArray(); |
1023 |
return answer;
|
1024 |
} |
1025 |
|
1026 |
// *
|
1027 |
// *
|
1028 |
// * Bypass URL/Cookie funcs
|
1029 |
// *
|
1030 |
// *
|
1031 |
|
1032 |
// chop the GBYPASS or GIBYPASS variable out of a bypass URL
|
1033 |
// This function ASSUMES that you really know what you are doing
|
1034 |
// Do NOT run this function unless you know that the URL contains a valid bypass code
|
1035 |
// Ernest W Lessenger
|
1036 |
void HTTPHeader::chopBypass(String url, bool infectionbypass) |
1037 |
{ |
1038 |
if (url.contains(infectionbypass ? "GIBYPASS=" : "GBYPASS=")) { |
1039 |
if (url.contains(infectionbypass ? "?GIBYPASS=" : "?GBYPASS=")) { |
1040 |
String bypass(url.after(infectionbypass ? "?GIBYPASS=" : "?GBYPASS=")); |
1041 |
header.front() = header.front().before(infectionbypass ? "?GIBYPASS=" : "?GBYPASS=") + header.front().after(bypass.toCharArray()); |
1042 |
} else {
|
1043 |
String bypass(url.after(infectionbypass ? "&GIBYPASS=" : "&GBYPASS=")); |
1044 |
header.front() = header.front().before(infectionbypass ? "&GIBYPASS=" : "&GBYPASS=") + header.front().after(bypass.toCharArray()); |
1045 |
} |
1046 |
} |
1047 |
cachedurl = "";
|
1048 |
} |
1049 |
|
1050 |
// same for scan bypass
|
1051 |
void HTTPHeader::chopScanBypass(String url)
|
1052 |
{ |
1053 |
if (url.contains("GSBYPASS=")) { |
1054 |
if (url.contains("?GSBYPASS=")) { |
1055 |
String bypass(url.after("?GSBYPASS="));
|
1056 |
header.front() = header.front().before("?GSBYPASS=") + header.front().after(bypass.toCharArray());
|
1057 |
} else {
|
1058 |
String bypass(url.after("&GSBYPASS="));
|
1059 |
header.front() = header.front().before("&GSBYPASS=") + header.front().after(bypass.toCharArray());
|
1060 |
} |
1061 |
} |
1062 |
cachedurl = "";
|
1063 |
} |
1064 |
|
1065 |
// I'm not proud of this... --Ernest
|
1066 |
String HTTPHeader::getCookie(const char *cookie) |
1067 |
{ |
1068 |
String line; |
1069 |
// TODO - do away with loop here somehow, or otherwise speed it up?
|
1070 |
for (std::deque<String>::iterator i = header.begin(); i != header.end(); i++) {
|
1071 |
if (i->startsWithLower("cookie:")) { |
1072 |
line = i->after(": ");
|
1073 |
if (line.contains(cookie)) { // We know we have the cookie |
1074 |
line = line.after(cookie); |
1075 |
line.lop(); // Get rid of the '='
|
1076 |
if (line.contains(";")) { |
1077 |
line = line.before(";");
|
1078 |
} |
1079 |
} |
1080 |
// break; // Technically there should be only one Cookie: header, but...
|
1081 |
} |
1082 |
} |
1083 |
line.removeWhiteSpace(); |
1084 |
#ifdef DGDEBUG
|
1085 |
std::cout << "Found GBYPASS cookie:" << line << std::endl;
|
1086 |
#endif
|
1087 |
return line;
|
1088 |
} |
1089 |
|
1090 |
// add cookie with given name & value to outgoing headers
|
1091 |
void HTTPHeader::setCookie(const char *cookie, const char *domain, const char *value) |
1092 |
{ |
1093 |
String line("Set-Cookie: ");
|
1094 |
line += cookie; |
1095 |
line += "=";
|
1096 |
line += value; |
1097 |
line += "; path=/; domain=.";
|
1098 |
line += domain; |
1099 |
line += "\r";
|
1100 |
header.push_back(line); |
1101 |
#ifdef DGDEBUG
|
1102 |
std::cout << "Setting cookie:" << line << std::endl;
|
1103 |
#endif
|
1104 |
// no expiry specified so ends with the browser session
|
1105 |
} |
1106 |
|
1107 |
// is this a temporary filter bypass cookie?
|
1108 |
bool HTTPHeader::isBypassCookie(String url, const char *magic, const char *clientip) |
1109 |
{ |
1110 |
String cookie(getCookie("GBYPASS"));
|
1111 |
if (!cookie.length()) {
|
1112 |
#ifdef DGDEBUG
|
1113 |
std::cout << "No bypass cookie" << std::endl;
|
1114 |
#endif
|
1115 |
return false; |
1116 |
} |
1117 |
String cookiehash(cookie.subString(0, 32)); |
1118 |
String cookietime(cookie.after(cookiehash.toCharArray())); |
1119 |
String mymagic(magic); |
1120 |
mymagic += clientip; |
1121 |
mymagic += cookietime; |
1122 |
bool matched = false; |
1123 |
while(url.contains(".")) { |
1124 |
String hashed(url.md5(mymagic.toCharArray())); |
1125 |
if (hashed == cookiehash) {
|
1126 |
matched = true;
|
1127 |
break;
|
1128 |
} |
1129 |
url = url.after(".");
|
1130 |
} |
1131 |
if (not matched) { |
1132 |
#ifdef DGDEBUG
|
1133 |
std::cout << "Cookie GBYPASS not match" << std::endl;
|
1134 |
#endif
|
1135 |
return false; |
1136 |
} |
1137 |
time_t timen = time(NULL);
|
1138 |
time_t timeu = cookietime.toLong(); |
1139 |
if (timeu < timen) {
|
1140 |
#ifdef DGDEBUG
|
1141 |
std::cout << "Cookie GBYPASS expired: " << timeu << " " << timen << std::endl; |
1142 |
#endif
|
1143 |
return false; |
1144 |
} |
1145 |
return true; |
1146 |
} |
1147 |
|
1148 |
// is this a temporary filter bypass URL?
|
1149 |
int HTTPHeader::isBypassURL(String * url, const char *magic, const char *clientip, bool *isvirusbypass) |
1150 |
{ |
1151 |
if ((*url).length() <= 45) |
1152 |
return false; // Too short, can't be a bypass |
1153 |
|
1154 |
// check to see if this is a bypass URL, and which type it is
|
1155 |
bool filterbypass = false; |
1156 |
bool virusbypass = false; |
1157 |
if ((isvirusbypass == NULL) && ((*url).contains("GBYPASS="))) { |
1158 |
filterbypass = true;
|
1159 |
} else if ((isvirusbypass != NULL) && (*url).contains("GIBYPASS=")) { |
1160 |
virusbypass = true;
|
1161 |
} |
1162 |
if (!(filterbypass || virusbypass))
|
1163 |
return 0; |
1164 |
|
1165 |
#ifdef DGDEBUG
|
1166 |
std::cout << "URL " << (filterbypass ? "GBYPASS" : "GIBYPASS") << " found checking..." << std::endl; |
1167 |
#endif
|
1168 |
|
1169 |
String url_left((*url).before(filterbypass ? "GBYPASS=" : "GIBYPASS=")); |
1170 |
url_left.chop(); // remove the ? or &
|
1171 |
String url_right((*url).after(filterbypass ? "GBYPASS=" : "GIBYPASS=")); |
1172 |
|
1173 |
String url_hash(url_right.subString(0, 32)); |
1174 |
String url_time(url_right.after(url_hash.toCharArray())); |
1175 |
#ifdef DGDEBUG
|
1176 |
std::cout << "URL: " << url_left << ", HASH: " << url_hash << ", TIME: " << url_time << std::endl; |
1177 |
#endif
|
1178 |
|
1179 |
String mymagic(magic); |
1180 |
mymagic += clientip; |
1181 |
mymagic += url_time; |
1182 |
String hashed(url_left.md5(mymagic.toCharArray())); |
1183 |
|
1184 |
if (hashed != url_hash) {
|
1185 |
#ifdef DGDEBUG
|
1186 |
std::cout << "URL " << (filterbypass ? "GBYPASS" : "GIBYPASS") << " hash mismatch" << std::endl; |
1187 |
#endif
|
1188 |
return 0; |
1189 |
} |
1190 |
|
1191 |
time_t timen = time(NULL);
|
1192 |
time_t timeu = url_time.toLong(); |
1193 |
|
1194 |
if (timeu < 1) { |
1195 |
#ifdef DGDEBUG
|
1196 |
std::cout << "URL " << (filterbypass ? "GBYPASS" : "GIBYPASS") << " bad time value" << std::endl; |
1197 |
#endif
|
1198 |
return 1; // bad time value |
1199 |
} |
1200 |
if (timeu < timen) { // expired key |
1201 |
#ifdef DGDEBUG
|
1202 |
std::cout << "URL " << (filterbypass ? "GBYPASS" : "GIBYPASS") << " expired" << std::endl; |
1203 |
#endif
|
1204 |
return 1; // denotes expired but there |
1205 |
} |
1206 |
#ifdef DGDEBUG
|
1207 |
std::cout << "URL " << (filterbypass ? "GBYPASS" : "GIBYPASS") << " not expired" << std::endl; |
1208 |
#endif
|
1209 |
if (virusbypass)
|
1210 |
(*isvirusbypass) = true;
|
1211 |
return (int) timeu; |
1212 |
} |
1213 |
|
1214 |
// is this a scan bypass URL? i.e. a "magic" URL for retrieving a previously scanned file
|
1215 |
bool HTTPHeader::isScanBypassURL(String * url, const char *magic, const char *clientip) |
1216 |
{ |
1217 |
if ((*url).length() <= 45) |
1218 |
return false; // Too short, can't be a bypass |
1219 |
|
1220 |
if (!(*url).contains("GSBYPASS=")) { // If this is not a bypass url |
1221 |
return false; |
1222 |
} |
1223 |
#ifdef DGDEBUG
|
1224 |
std::cout << "URL GSBYPASS found checking..." << std::endl;
|
1225 |
#endif
|
1226 |
|
1227 |
String url_left((*url).before("GSBYPASS="));
|
1228 |
url_left.chop(); // remove the ? or &
|
1229 |
String url_right((*url).after("GSBYPASS="));
|
1230 |
|
1231 |
String url_hash(url_right.subString(0, 32)); |
1232 |
#ifdef DGDEBUG
|
1233 |
std::cout << "URL: " << url_left << ", HASH: " << url_hash << std::endl; |
1234 |
#endif
|
1235 |
|
1236 |
// format is:
|
1237 |
// GSBYPASS=hash(ip+url+tempfilename+mime+disposition+secret)
|
1238 |
// &N=tempfilename&M=mimetype&D=dispos
|
1239 |
|
1240 |
String tempfilename(url_right.after("&N="));
|
1241 |
String tempfilemime(tempfilename.after("&M="));
|
1242 |
String tempfiledis(tempfilemime.after("&D="));
|
1243 |
tempfilemime = tempfilemime.before("&D=");
|
1244 |
tempfilename = tempfilename.before("&M=");
|
1245 |
|
1246 |
String tohash(clientip + url_left + tempfilename + tempfilemime + tempfiledis + magic); |
1247 |
String hashed(tohash.md5()); |
1248 |
|
1249 |
#ifdef DGDEBUG
|
1250 |
std::cout << "checking hash: " << clientip << " " << url_left << " " << tempfilename << " " << " " << tempfilemime << " " << tempfiledis << " " << magic << " " << hashed << std::endl; |
1251 |
#endif
|
1252 |
|
1253 |
if (hashed == url_hash) {
|
1254 |
return true; |
1255 |
} |
1256 |
#ifdef DGDEBUG
|
1257 |
std::cout << "URL GSBYPASS HASH mismatch" << std::endl;
|
1258 |
#endif
|
1259 |
|
1260 |
return false; |
1261 |
} |
1262 |
|
1263 |
// *
|
1264 |
// *
|
1265 |
// * URL and Base64 decoding funcs
|
1266 |
// *
|
1267 |
// *
|
1268 |
|
1269 |
// URL decoding (%xx)
|
1270 |
// uses regex pre-compiled on startup
|
1271 |
String HTTPHeader::decode(const String &s, bool decodeAll) |
1272 |
{ |
1273 |
if (s.length() < 3) { |
1274 |
return s;
|
1275 |
} |
1276 |
#ifdef DGDEBUG
|
1277 |
std::cout << "decoding url" << std::endl;
|
1278 |
#endif
|
1279 |
if (!urldecode_re.match(s.c_str())) {
|
1280 |
return s;
|
1281 |
} // exit if not found
|
1282 |
#ifdef DGDEBUG
|
1283 |
std::cout << "matches:" << urldecode_re.numberOfMatches() << std::endl;
|
1284 |
std::cout << "removing %XX" << std::endl;
|
1285 |
#endif
|
1286 |
int match;
|
1287 |
int offset;
|
1288 |
int pos = 0; |
1289 |
int size = s.length();
|
1290 |
String result; |
1291 |
String n; |
1292 |
for (match = 0; match < urldecode_re.numberOfMatches(); match++) { |
1293 |
offset = urldecode_re.offset(match); |
1294 |
if (offset > pos) {
|
1295 |
result += s.subString(pos, offset - pos); |
1296 |
} |
1297 |
n = urldecode_re.result(match).c_str(); |
1298 |
n.lop(); // remove %
|
1299 |
result += hexToChar(n, decodeAll); |
1300 |
#ifdef DGDEBUG
|
1301 |
std::cout << "encoded: " << urldecode_re.result(match) << " decoded: " << hexToChar(n) << " string so far: " << result << std::endl; |
1302 |
#endif
|
1303 |
pos = offset + 3;
|
1304 |
} |
1305 |
if (size > pos) {
|
1306 |
result += s.subString(pos, size - pos); |
1307 |
} else {
|
1308 |
n = "%" + n;
|
1309 |
} |
1310 |
return result;
|
1311 |
} |
1312 |
|
1313 |
// turn %xx back into original character
|
1314 |
String HTTPHeader::hexToChar(const String &n, bool all) |
1315 |
{ |
1316 |
if (n.length() < 2) { |
1317 |
return String(n);
|
1318 |
} |
1319 |
static char buf[2]; |
1320 |
unsigned int a, b; |
1321 |
unsigned char c; |
1322 |
a = n[0];
|
1323 |
b = n[1];
|
1324 |
if (a >= 'a' && a <= 'f') { |
1325 |
a -= 87;
|
1326 |
} |
1327 |
else if (a >= 'A' && a <= 'F') { |
1328 |
a -= 55;
|
1329 |
} |
1330 |
else if (a >= '0' && a <= '9') { |
1331 |
a -= 48;
|
1332 |
} |
1333 |
else {
|
1334 |
return String("%") + n; |
1335 |
} |
1336 |
if (b >= 'a' && b <= 'f') { |
1337 |
b -= 87;
|
1338 |
} |
1339 |
else if (b >= 'A' && b <= 'F') { |
1340 |
b -= 55;
|
1341 |
} |
1342 |
else if (b >= '0' && b <= '9') { |
1343 |
b -= 48;
|
1344 |
} |
1345 |
else {
|
1346 |
return String("%") + n; |
1347 |
} |
1348 |
c = a * 16 + b;
|
1349 |
if (all || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c == '-')) { |
1350 |
buf[0] = c;
|
1351 |
buf[1] = '\0'; |
1352 |
return String(buf);
|
1353 |
} else {
|
1354 |
return String("%") + n; |
1355 |
} |
1356 |
} |
1357 |
|
1358 |
// decode a line of base64
|
1359 |
std::string HTTPHeader::decodeb64(const String& line) |
1360 |
{ // decode a block of b64 MIME
|
1361 |
long four = 0; |
1362 |
int d;
|
1363 |
std::string result;
|
1364 |
int len = line.length() - 4; |
1365 |
for (int i = 0; i < len; i += 4) { |
1366 |
four = 0;
|
1367 |
d = decode1b64(line[i + 0]);
|
1368 |
four = four | d; |
1369 |
d = decode1b64(line[i + 1]);
|
1370 |
four = (four << 6) | d;
|
1371 |
d = decode1b64(line[i + 2]);
|
1372 |
four = (four << 6) | d;
|
1373 |
d = decode1b64(line[i + 3]);
|
1374 |
four = (four << 6) | d;
|
1375 |
d = (four & 0xFF0000) >> 16; |
1376 |
result += (char) d;
|
1377 |
d = (four & 0xFF00) >> 8; |
1378 |
result += (char) d;
|
1379 |
d = four & 0xFF;
|
1380 |
result += (char) d;
|
1381 |
} |
1382 |
return result;
|
1383 |
} |
1384 |
|
1385 |
// decode an individual base64 character
|
1386 |
int HTTPHeader::decode1b64(char c) |
1387 |
{ |
1388 |
unsigned char i = '\0'; |
1389 |
switch (c) {
|
1390 |
case '+': |
1391 |
i = 62;
|
1392 |
break;
|
1393 |
case '/': |
1394 |
i = 63;
|
1395 |
break;
|
1396 |
case '=': |
1397 |
i = 0;
|
1398 |
break;
|
1399 |
default: // must be A-Z, a-z or 0-9 |
1400 |
i = '9' - c;
|
1401 |
if (i > 0x3F) { // under 9 |
1402 |
i = 'Z' - c;
|
1403 |
if (i > 0x3F) { // over Z |
1404 |
i = 'z' - c;
|
1405 |
if (i > 0x3F) { // over z so invalid |
1406 |
i = 0x80; // so set the high bit |
1407 |
} else {
|
1408 |
// a-z
|
1409 |
i = c - 71;
|
1410 |
} |
1411 |
} else {
|
1412 |
// A-Z
|
1413 |
i = c - 65;
|
1414 |
} |
1415 |
} else {
|
1416 |
// 0-9
|
1417 |
i = c + 4;
|
1418 |
} |
1419 |
break;
|
1420 |
} |
1421 |
return (int) i; |
1422 |
} |
1423 |
|
1424 |
// *
|
1425 |
// *
|
1426 |
// * network send/receive funcs
|
1427 |
// *
|
1428 |
// *
|
1429 |
|
1430 |
// send headers out over the given socket
|
1431 |
// "reconnect" flag gives permission to reconnect to the socket on write error
|
1432 |
// - this allows us to re-open the proxy connection on pconns if squid's end has
|
1433 |
// timed out but the client's end hasn't. not much use with NTLM, since squid
|
1434 |
// will throw a 407 and restart negotiation, but works well with basic & others.
|
1435 |
void HTTPHeader::out(Socket * peersock, Socket * sock, int sendflag, bool reconnect) throw(std::exception) |
1436 |
{ |
1437 |
String l; // for amalgamating to avoid conflict with the Nagel algorithm
|
1438 |
|
1439 |
if (sendflag == __DGHEADER_SENDALL || sendflag == __DGHEADER_SENDFIRSTLINE) {
|
1440 |
if (header.size() > 0) { |
1441 |
l = header.front() + "\n";
|
1442 |
#ifdef DGDEBUG
|
1443 |
std::cout << "headertoclient:" << l << std::endl;
|
1444 |
#endif
|
1445 |
// first reconnect loop - send first line
|
1446 |
while (true) { |
1447 |
if (!(*sock).writeToSocket(l.toCharArray(), l.length(), 0, timeout)) { |
1448 |
// reconnect & try again if we've been told to
|
1449 |
if (reconnect) {
|
1450 |
// don't try more than once
|
1451 |
#ifdef DGDEBUG
|
1452 |
std::cout << "Proxy connection broken (1); trying to re-establish..." << std::endl;
|
1453 |
syslog(LOG_ERR,"Proxy connection broken (1); trying to re-establish...");
|
1454 |
#endif
|
1455 |
reconnect = false;
|
1456 |
sock->reset(); |
1457 |
int rc = sock->connect(o.proxy_ip, o.proxy_port);
|
1458 |
if (rc)
|
1459 |
throw std::exception();
|
1460 |
continue;
|
1461 |
} |
1462 |
throw std::exception();
|
1463 |
} |
1464 |
// if we got here, we succeeded, so break the reconnect loop
|
1465 |
break;
|
1466 |
} |
1467 |
} |
1468 |
if (sendflag == __DGHEADER_SENDFIRSTLINE) {
|
1469 |
return;
|
1470 |
} |
1471 |
} |
1472 |
|
1473 |
l = "";
|
1474 |
|
1475 |
for (std::deque<String>::iterator i = header.begin() + 1; i != header.end(); i++) { |
1476 |
l += (*i) + "\n";
|
1477 |
} |
1478 |
l += "\r\n";
|
1479 |
|
1480 |
#ifdef DGDEBUG
|
1481 |
std::cout << "headertoclient:" << l << std::endl;
|
1482 |
#endif
|
1483 |
|
1484 |
// second reconnect loop
|
1485 |
while (true) { |
1486 |
// send header to the output stream
|
1487 |
// need exception for bad write
|
1488 |
|
1489 |
if (!(*sock).writeToSocket(l.toCharArray(), l.length(), 0, timeout)) { |
1490 |
// reconnect & try again if we've been told to
|
1491 |
if (reconnect) {
|
1492 |
// don't try more than once
|
1493 |
#ifdef DGDEBUG
|
1494 |
std::cout << "Proxy connection broken (2); trying to re-establish..." << std::endl;
|
1495 |
syslog(LOG_ERR,"Proxy connection broken (2); trying to re-establish...");
|
1496 |
#endif
|
1497 |
reconnect = false;
|
1498 |
sock->reset(); |
1499 |
int rc = sock->connect(o.proxy_ip, o.proxy_port);
|
1500 |
if (rc)
|
1501 |
throw std::exception();
|
1502 |
// include the first line on the retry
|
1503 |
l = header.front() + "\n" + l;
|
1504 |
continue;
|
1505 |
} |
1506 |
throw std::exception();
|
1507 |
} |
1508 |
// if we got here, we succeeded, so break the reconnect loop
|
1509 |
break;
|
1510 |
} |
1511 |
|
1512 |
if ((!requestType().startsWith("HTTP")) && (pcontentlength != NULL)) { |
1513 |
if (postdatalen > 0) { |
1514 |
#ifdef DGDEBUG
|
1515 |
std::cout << "Sending initial POST data chunk" << std::endl;
|
1516 |
#endif
|
1517 |
// Re-add the chopped off \n, if necessary
|
1518 |
if (postdatachopped) {
|
1519 |
#ifdef DGDEBUG
|
1520 |
std::cout << "Re-adding newline to POST data (postdatalen " << postdatalen << ")" << std::endl; |
1521 |
#endif
|
1522 |
postdata[postdatalen-1] = '\n'; |
1523 |
postdata[postdatalen] = '\0';
|
1524 |
} |
1525 |
sock->writeToSockete(postdata, postdatalen, 0, timeout);
|
1526 |
} |
1527 |
#ifdef DGDEBUG
|
1528 |
std::cout << "Opening tunnel for remainder of POST data" << std::endl;
|
1529 |
#endif
|
1530 |
FDTunnel fdt; |
1531 |
off_t remaining = contentLength() - postdatalen; |
1532 |
if (remaining < 0) |
1533 |
throw std::runtime_error("No POST data left to send!?"); |
1534 |
fdt.tunnel(*peersock, *sock, false, remaining, true); |
1535 |
} |
1536 |
} |
1537 |
|
1538 |
// discard remainder of POST data
|
1539 |
void HTTPHeader::discard(Socket *sock)
|
1540 |
{ |
1541 |
static char fred[4096]; |
1542 |
off_t cl = contentLength() - postdatalen; |
1543 |
int rc;
|
1544 |
while (cl > 0) { |
1545 |
rc = sock->readFromSocket(fred, ((cl > 4096) ? 4096 : cl), 0, timeout, false); |
1546 |
if (rc > 0) |
1547 |
cl -= rc; |
1548 |
else
|
1549 |
break;
|
1550 |
} |
1551 |
} |
1552 |
|
1553 |
void HTTPHeader::in(Socket * sock, bool allowpersistent, bool honour_reloadconfig) |
1554 |
{ |
1555 |
if (dirty) reset();
|
1556 |
dirty = true;
|
1557 |
|
1558 |
// the RFCs don't specify a max header line length so this should be
|
1559 |
// dynamic really. Pointed out (well reminded actually) by Daniel Robbins
|
1560 |
char buff[8192]; // setup a buffer to hold the incomming HTTP line |
1561 |
String line; // temp store to hold the line after processing
|
1562 |
line = "----"; // so we get past the first while |
1563 |
bool firsttime = true; |
1564 |
bool discard = false; |
1565 |
while (line.length() > 3 || discard) { // loop until the stream is |
1566 |
// failed or we get to the end of the header (a line by itself)
|
1567 |
|
1568 |
// get a line of header from the stream
|
1569 |
// on the first time round the loop, honour the reloadconfig flag if desired
|
1570 |
// - this lets us break when waiting for the next request on a pconn, but not
|
1571 |
// during receipt of a request in progress.
|
1572 |
(*sock).getLine(buff, 8192, timeout, firsttime ? honour_reloadconfig : false); |
1573 |
|
1574 |
// getline will throw an exception if there is an error which will
|
1575 |
// only be caught by HandleConnection()
|
1576 |
|
1577 |
line = buff; // convert the line to a String
|
1578 |
|
1579 |
// ignore crap left in buffer from old pconns (in particular, the IE "extra CRLF after POST" bug)
|
1580 |
discard = false;
|
1581 |
if (not (firsttime && line.length() <= 3)) |
1582 |
header.push_back(line); // stick the line in the deque that holds the header
|
1583 |
else {
|
1584 |
discard = true;
|
1585 |
#ifdef DGDEBUG
|
1586 |
std::cout << "Discarding unwanted bytes at head of request (pconn closed or IE multipart POST bug)" << std::endl;
|
1587 |
#endif
|
1588 |
} |
1589 |
firsttime = false;
|
1590 |
} |
1591 |
header.pop_back(); // remove the final blank line of a header
|
1592 |
if (header.size() == 0) |
1593 |
throw std::exception();
|
1594 |
|
1595 |
checkheader(allowpersistent); // sort out a few bits in the header
|
1596 |
} |