Projet

Général

Profil

HTTPHeader-modif-by-FTRIF.cpp

Thierry Bertrand, 17/02/2011 15:30

Télécharger (46,1 ko)

 
1
//Please refer to http://dansguardian.org/?page=copyright2
2
//for the license for this code.
3
//Written by Daniel Barron (daniel@//jadeb.com).
4
//For support go to http://groups.yahoo.com/group/dansguardian
5

    
6
//  This program is free software; you can redistribute it and/or modify
7
//  it under the terms of the GNU General Public License as published by
8
//  the Free Software Foundation; either version 2 of the License, or
9
//  (at your option) any later version.
10
//
11
//  This program is distributed in the hope that it will be useful,
12
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
13
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
//  GNU General Public License for more details.
15
//
16
//  You should have received a copy of the GNU General Public License
17
//  along with this program; if not, write to the Free Software
18
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19

    
20
//This file contains modifications suggested and mostly provided by
21
//Daniel Robbins 13/4/01 drobbins@gento.org
22
//Modifications include, but not limited to, getcontenttype(), << , >>
23

    
24

    
25
// INCLUDES
26

    
27
#ifdef HAVE_CONFIG_H
28
        #include "dgconfig.h"
29
#endif
30
#include "HTTPHeader.hpp"
31
#include "Socket.hpp"
32
#include "OptionContainer.hpp"
33
#include "FDTunnel.hpp"
34

    
35
#include <unistd.h>
36
#include <sys/socket.h>
37
#include <exception>
38
#include <time.h>
39
#include <syslog.h>
40
#include <cerrno>
41
#include <zlib.h>
42

    
43

    
44
// GLOBALS
45
extern OptionContainer o;
46

    
47
// regexp for decoding %xx in URLs
48
extern RegExp urldecode_re;
49

    
50

    
51
// IMPLEMENTATION
52

    
53
// set timeout for socket operations
54
void HTTPHeader::setTimeout(int t)
55
{
56
        timeout = t;
57
}
58

    
59
// reset header object for future use
60
void HTTPHeader::reset()
61
{
62
        if (dirty) {
63
                header.clear();
64
                //postdata.reset();
65
                postdata[0] = '\0';
66
                postdatalen = 0;
67
                postdatachopped = false;
68
                ispostupload = false;
69
                waspersistent = false;
70
                ispersistent = false;
71

    
72
                cachedurl = "";
73

    
74
                phost = NULL;
75
                pport = NULL;
76
                pcontentlength = NULL;
77
                pcontenttype = NULL;
78
                pproxyauthorization = NULL;
79
                pcontentdisposition = NULL;
80
                puseragent = NULL;
81
                pxforwardedfor = NULL;
82
                pcontentencoding = NULL;
83
                pproxyconnection = NULL;
84
                
85
                dirty = false;
86
        }
87
}
88

    
89
// *
90
// *
91
// * header value and type checks
92
// *
93
// *
94

    
95
// grab request type (GET, HEAD etc.)
96
String HTTPHeader::requestType()
97
{
98
        return header.front().before(" ");
99
}
100

    
101
// grab return code
102
int HTTPHeader::returnCode()
103
{
104
        return header.front().after(" ").before(" ").toInteger();
105
}
106

    
107
// grab content length
108
off_t HTTPHeader::contentLength()
109
{
110
        // code 304 - not modified - no content
111
        String temp(header.front().after(" "));
112
        if (temp.startsWith("304"))
113
                return 0;
114
        if (pcontentlength != NULL) {
115
                temp = pcontentlength->after(" ");
116
                return temp.toOffset();
117
        }
118
        // no content-length header - we don't know
119
        return -1;
120
}
121

    
122
// grab the auth type
123
String HTTPHeader::getAuthType()
124
{
125
        if (pproxyauthorization != NULL) {
126
                return pproxyauthorization->after(" ").before(" ");
127
        }
128
        return "";
129
}
130

    
131
// check the request's return code to see if it's an auth required message
132
bool HTTPHeader::authRequired()
133
{
134
        String temp(header.front().after(" "));
135
        if (temp.startsWith("407")) {
136
                return true;
137
        }
138
        return false;
139
}
140

    
141
// grab content disposition
142
String HTTPHeader::disposition()
143
{
144
        if (pcontentdisposition != NULL) {
145
                String filename(pcontentdisposition->after("filename").after("="));
146
                if (filename.contains(";"))
147
                        filename = filename.before(";");
148
                filename.removeWhiteSpace();  // incase of trailing space
149
                if (filename.contains("\"")) {
150
                        return filename.after("\"").before("\"");
151
                }
152
                return filename;
153
                // example format:
154
                // Content-Disposition: attachment; filename="filename.ext"
155
                // Content-Disposition: attachment; filename=filename.ext
156
                // Content-Disposition: filename="filename.ext"
157
                // 3rd format encountered from download script on realVNC's
158
                // website. notice it does not contain any semicolons! PRA 4-11-2005
159
        }
160
        return "";  // it finds the header proposed filename
161
}
162

    
163
// grab the user agent
164
String HTTPHeader::userAgent()
165
{
166
        if (puseragent != NULL) {
167
                // chop off '/r'
168
                String result(puseragent->after(" "));
169
                result.resize(result.length() - 1);
170
                return result;
171
        }
172
        return "";
173
}
174

    
175
// grab the content type header
176
String HTTPHeader::getContentType()
177
{
178
        if (pcontenttype != NULL) {
179
                String mimetype(pcontenttype->after(" "));
180
                if (mimetype.length() < 1)
181
                        return "-";
182
                
183
                unsigned char c;
184
                size_t j = 0;
185
                while (j < mimetype.length()) {
186
                        c = mimetype[j];
187
                        if (c == ' ' || c == ';' || c < 32) {        // remove the
188
                                mimetype = mimetype.subString(0, j);
189
                                // extra info not needed
190
                                j = 0;
191
                        }
192
                        ++j;
193
                }
194
                
195
                mimetype.toLower();
196
                return mimetype;
197
        }
198
        return "-";
199
}
200

    
201
// does the given content type string match our headers?
202
bool HTTPHeader::isContentType(const String& t)
203
{
204
        return getContentType().startsWith(t);
205
}
206

    
207
// grab contents of X-Forwarded-For header
208
// Modification based on a submitted patch by
209
// Jimmy Myrick (jmyrick@tiger1.tiger.org)
210
std::string HTTPHeader::getXForwardedForIP()
211
{
212
        if (pxforwardedfor != NULL) {
213
                String line(pxforwardedfor->after(": "));
214
                line.chop();
215
                return std::string(line.toCharArray());
216
        }
217
        return "";
218
}
219

    
220
// check the return code to see if it's a redirection request
221
bool HTTPHeader::isRedirection()
222
{
223
        // The 1st line of the header for a redirection is thus:
224
        // HTTP/1.(0|1) 3xx
225
        if (header.size() < 1) {
226
                return false;
227
        }                        // sometimes get called b 4 read
228
        String answer(header.front().after(" ").before(" "));
229
        if (answer[0] == '3' && answer.length() == 3) {
230
                return true;
231
        }
232
        return false;
233
}
234

    
235
// grab the contents of Proxy-Authorization header
236
// returns base64-decoding of the chunk of data after the auth type string
237
std::string HTTPHeader::getAuthData()
238
{
239
        if (pproxyauthorization != NULL) {
240
                String line(pproxyauthorization->after(" ").after(" "));
241
                return decodeb64(line);  // it's base64 MIME encoded
242
        }
243
        return "";
244
}
245

    
246
// grab raw contents of Proxy-Authorization header without decoding
247
std::string HTTPHeader::getRawAuthData()
248
{
249
        if (pproxyauthorization != NULL) {
250
                return pproxyauthorization->after(" ").after(" ");
251
        }
252
        return "";
253
}
254

    
255
// do we have a non-identity content encoding? this means body is compressed
256
bool HTTPHeader::isCompressed()
257
{
258
        if (pcontentencoding != NULL) {
259
                if (pcontentencoding->indexOf("identity") != -1) {
260
                        // http1.1 says this
261
                        // should not be here, but not must not
262
                        return false;
263
                }
264
#ifdef DGDEBUG
265
                std::cout << "is compressed" << std::endl;
266
#endif
267
                return true;  // i.e. encoded with something other than clear
268
        }
269
        return false;
270
}
271

    
272
// grab content encoding header
273
String HTTPHeader::contentEncoding()
274
{
275
        if (pcontentencoding != NULL) {
276
                String ce(pcontentencoding->after(": "));
277
                ce.toLower();
278
                return ce;
279
        }
280
        return "";  // we need a default don't we?
281
}
282

    
283
// *
284
// *
285
// * header modifications
286
// *
287
// *
288

    
289
// squid adds this so if more support it it may be useful one day
290
void HTTPHeader::addXForwardedFor(const std::string &clientip)
291
{
292
        std::string line("X-Forwarded-For: " + clientip + "\r");
293
        header.push_back(String(line.c_str()));
294
}
295

    
296
// set content length header to report given lenth
297
void HTTPHeader::setContentLength(int newlen)
298
{
299
        if (pcontentlength != NULL) {
300
                (*pcontentlength) = "Content-Length: " + String(newlen) + "\r";
301
        }
302
}
303

    
304
// set the proxy-connection header to allow persistence (or not)
305
void HTTPHeader::makePersistent(bool persist)
306
{
307
        if (persist) {
308
                // Only make persistent if it originally was, but now isn't.
309
                // The intention isn't to change browser behaviour, just to
310
                // un-do any connection downgrading which DG may have performed
311
                // earlier.
312
                if (waspersistent && !ispersistent) {
313
                        if (pproxyconnection != NULL) {
314
                                (*pproxyconnection) = pproxyconnection->before(":") + ": Keep-Alive\r";
315
                        } else {
316
                                header.push_back(String("Proxy-Connection: Keep-Alive\r"));
317
                                pproxyconnection = &(header.back());
318
                        }
319
                        ispersistent = true;
320
                }
321
        } else {
322
                // Only downgrade to non-persistent if it isn't currently persistent.
323
                if (ispersistent) {
324
                        if (pproxyconnection != NULL) {
325
                                (*pproxyconnection) = pproxyconnection->before(":") + ": Close\r";
326
                        } else {
327
                                header.push_back(String("Proxy-Connection: Close\r"));
328
                                pproxyconnection = &(header.back());
329
                        }
330
                        ispersistent = false;
331
                }
332
        }
333
}
334

    
335
// return a modified accept-encoding header, based on the one supplied,
336
// but with "identity" added and only supported encodings allowed.
337
String HTTPHeader::modifyEncodings(String e)
338
{
339

    
340
        // There are 4 types of encoding: gzip, deflate, compress and identity
341
        // deflate is in zlib format
342
        // compress is in unix compress format
343
        // identity is uncompressed and supported by all browsers (obviously)
344
        // we do not support compress
345

    
346
        e.toLower();
347
        String o("Accept-Encoding: identity");
348
#if ZLIB_VERNUM < 0x1210
349
#warning 'Accept-Encoding: gzip' is disabled
350
#else
351
        if (e.contains("gzip")) {
352
                o += ",gzip";
353
        }
354
#endif
355
        if (e.contains("deflate")) {
356
                o += ",deflate";
357
        }
358

    
359
        return o;
360
}
361

    
362
// set content length to report the given length, and strip content encoding
363
void HTTPHeader::removeEncoding(int newlen)
364
{
365
        if (pcontentlength != NULL) {
366
                (*pcontentlength) = "Content-Length: " + String(newlen) + "\r";
367
        }
368
        // this may all be overkill. since we strip everything out of the outgoing
369
        // accept-encoding header that we don't support, we won't be getting anything
370
        // back again that we don't support, in theory. leave new code commented
371
        // unless it proves to be necessary further down the line. PRA 20-10-2005
372
        if (pcontentencoding != NULL) {
373
/*#ifdef DGDEBUG
374
                std::cout << std::endl << "Stripping Content-Encoding header" <<std::endl;
375
                std::cout << "Old: " << header[i] <<std::endl;
376
#endif
377
                // only strip supported compression types
378
                String temp(header[i].after(":"));
379
                temp.removeWhiteSpace();
380
                String newheader;
381
                // iterate over comma-separated list of encodings
382
                while (temp.length() != 0) {
383
                        if (!(temp.startsWith("gzip") || temp.startsWith("deflate"))) {
384
                                // add other, unstripped encoding types back into the header
385
                                if (newheader.length() != 0)
386
                                        newheader += ", ";
387
                                newheader += (temp.before(",").length() != 0 ? temp.before(",") : temp);
388
                        }
389
                        temp = temp.after(",");
390
                        temp.removeWhiteSpace();
391
                }
392
                if (newheader.length() == 0)*/
393
                        (*pcontentencoding) = "X-DansGuardian-Removed: Content-Encoding\r";
394
/*                        else
395
                        header[i] = "Content-Encoding: "+newheader;
396
#ifdef DGDEBUG
397
                std::cout << "New: " << header[i] << std::endl << std::endl;
398
#endif*/
399
        }
400
}
401

    
402
// modifies the URL in all relevant header lines after a regexp search and replace
403
// setURL Code originally from from Ton Gorter 2004
404
void HTTPHeader::setURL(String &url) {
405
        //Modif FTRIF 
406
        //String hostname;
407
        String hostname,credentials;
408
        //Fin Modif FTRIF
409

    
410
        bool https = (url.before("://") == "https");
411
        int port = (https ? 443 : 80);
412

    
413
        if (!url.after("://").contains("/")) {
414
                url += "/";
415
        }
416
        hostname = url.after("://").before("/");
417
        if (hostname.contains("@")) { // Contains a username:password combo
418
                hostname = hostname.after("@");
419
        }
420
        if (hostname.contains(":")) {
421
                port = hostname.after(":").toInteger();
422
                if (port == 0 || port > 65535) {
423
                        port = (https ? 443 : 80);
424
                }
425
                hostname = hostname.before(":");  // chop off the port bit
426
        }
427

    
428

    
429
        //Ajout FTRIF
430
        //Restore stripped credentials
431
        credentials="";
432
        if (header.front().after("://").before(hostname.toCharArray()).contains("@"))
433
        { // Contains a username:password combo
434
          credentials = header.front().after("://").before(hostname.toCharArray());
435
        }
436
        //Fin ajout FTRIF
437

    
438
#ifdef DGDEBUG
439
        std::cout << "setURL: header.front() changed from: " << header.front() << std::endl;
440
#endif
441
        if (!https)
442
                //Modif FTRIF
443
                //header.front() = header.front().before(" ") + " " + url + " " + header.front().after(" ").after(" ");
444
                header.front() = header.front().before(" ") + " " + url.before("://") + "://" + credentials + url.after("://") + " " + header.front().after(" ").after(" ");
445
                //Fin Modif FTRIF
446
        else
447
                // Should take form of "CONNECT example.com:443 HTTP/1.0" for SSL
448
                header.front() = header.front().before(" ") + " " + hostname + ":" + String(port) + " " + header.front().after(" ").after(" ");
449
#ifdef DGDEBUG
450
        std::cout << " to: " << header.front() << std::endl;
451
#endif
452

    
453
        if (phost != NULL) {
454
#ifdef DGDEBUG
455
                std::cout << "setURL: header[] line changed from: " << (*phost) << std::endl;
456
#endif
457
                (*phost) = String("Host: ") + hostname;
458
                if (port != (https ? 443 : 80))
459
                {
460
                        (*phost) += ":";
461
                        (*phost) += String(port);
462
                }
463
                (*phost) += "\r";
464
#ifdef DGDEBUG
465
                std::cout << " to " << (*phost) << std::endl;
466
#endif
467
        }
468
        if (pport != NULL) {
469
#ifdef DGDEBUG
470
                std::cout << "setURL: header[] line changed from: " << (*pport) << std::endl;
471
#endif
472
                (*pport) = String("Port: ") + String(port) + "\r";
473
#ifdef DGDEBUG
474
                std::cout << " to " << (*pport) << std::endl;
475
#endif
476
        }
477
        // Don't just cache the URL we're sent - url() performs some other
478
        // processing, notably stripping the port part. Caching here will
479
        // bypass all that.
480
        //cachedurl = url.toCharArray();
481
}
482

    
483
// Does a regexp search and replace.
484
// urlRegExp Code originally from from Ton Gorter 2004
485
bool HTTPHeader::regExp(String& line, std::deque<RegExp>& regexp_list, std::deque<String>& replacement_list) {
486
        RegExp *re;
487
        String replacement;
488
        String repstr;
489
        String newLine;
490
        bool linemodified = false;
491
        unsigned int i;
492
        unsigned int j, k;
493
        unsigned int s = regexp_list.size();
494
        unsigned int matches, submatches;
495
        unsigned int match;
496
        unsigned int srcoff;
497
        unsigned int nextoffset;
498
        unsigned int matchlen;
499
        unsigned int oldlinelen;
500

    
501
        // iterate over our list of precompiled regexes
502
        for (i = 0; i < s; i++) {
503
                newLine = "";
504
                re = &(regexp_list[i]);
505
                if (re->match(line.toCharArray())) {
506
                        repstr = replacement_list[i];
507
                        matches = re->numberOfMatches();
508

    
509
                        srcoff = 0;
510

    
511
                        for (j = 0; j < matches; j++) {
512
                                nextoffset = re->offset(j);
513
                                matchlen = re->length(j);
514
                                
515
                                // copy next chunk of unmodified data
516
                                if (nextoffset > srcoff) {
517
                                        newLine += line.subString(srcoff, nextoffset - srcoff);
518
                                        srcoff = nextoffset;
519
                                }
520

    
521
                                // Count number of submatches (brackets) in replacement string
522
                                for (submatches = 0; j+submatches+1 < matches; submatches++)
523
                                        if (re->offset(j+submatches+1) + re->length(j+submatches+1) > srcoff + matchlen)
524
                                                break;
525

    
526
                                // \1 and $1 replacement
527
                                replacement = "";
528
                                for (k = 0; k < repstr.length(); k++) {
529
                                        // find \1..\9 and $1..$9 and fill them in with submatched strings
530
                                        if ((repstr[k] == '\\' || repstr[k] == '$') && repstr[k+1] >= '1' && repstr[k+1] <= '9') {
531
                                                match = repstr[++k] - '0';
532
                                                if (match <= submatches) {
533
                                                        replacement += re->result(j + match).c_str();
534
                                                }
535
                                        } else {
536
                                                // unescape \\ and \$, and add non-backreference characters to string
537
                                                if (repstr[k] == '\\' && (repstr[k+1] == '\\' || repstr[k+1] == '$'))
538
                                                        k++;
539
                                                replacement += repstr.subString(k, 1);
540
                                        }
541
                                }
542
                                
543
                                // copy filled in replacement string
544
                                newLine += replacement;
545
                                srcoff += matchlen;
546
                                j += submatches;
547
                        }
548
                        oldlinelen = line.length();
549
                        if (srcoff < oldlinelen) {
550
                                newLine += line.subString(srcoff, oldlinelen - srcoff);
551
                        }
552
#ifdef DGDEBUG
553
                        std::cout << "Line modified! (" << line << " -> " << newLine << ")" << std::endl;
554
#endif
555
                        // copy newLine into line and continue with other regexes
556
                        line = newLine;
557
                        linemodified = true;
558
                }
559
        }
560
        
561
        return linemodified;
562
}
563

    
564
// Perform searches and replacements on URL
565
bool HTTPHeader::urlRegExp(int filtergroup) {
566
        // exit immediately if list is empty
567
        if (not o.fg[filtergroup]->url_regexp_list_comp.size())
568
                return false;
569
#ifdef DGDEBUG
570
        std::cout << "Starting URL reg exp replace" << std::endl;
571
#endif
572
        String newUrl(url());
573
        if (regExp(newUrl, o.fg[filtergroup]->url_regexp_list_comp, o.fg[filtergroup]->url_regexp_list_rep)) {
574
                setURL(newUrl);
575
                return true;
576
        }
577
        return false;
578
}
579

    
580
// Perform searches and replacements on header lines
581
bool HTTPHeader::headerRegExp(int filtergroup) {
582
        // exit immediately if list is empty
583
        if (not o.fg[filtergroup]->header_regexp_list_comp.size())
584
                return false;
585
        bool result = false;
586
        for (std::deque<String>::iterator i = header.begin(); i != header.end(); i++) {
587
#ifdef DGDEBUG
588
                std::cout << "Starting header reg exp replace: " << *i << std::endl;
589
#endif
590
                bool chop = false;
591
                if (i->endsWith("\r"))
592
                {
593
                        i->chop();
594
                        chop = true;
595
                }
596
                result |= regExp(*i, o.fg[filtergroup]->header_regexp_list_comp, o.fg[filtergroup]->header_regexp_list_rep);
597
                if (chop)
598
                        i->append("\r");
599
        }
600
        return result;
601
}
602

    
603
// *
604
// *
605
// * detailed header checks & fixes
606
// *
607
// *
608

    
609
// is a URL malformed?
610
bool HTTPHeader::malformedURL(const String& url)
611
{
612
        String host(url.after("://"));
613
        if (host.contains("/"))
614
                host = host.before("/");
615
        if (host.length() < 2) {
616
#ifdef DGDEBUG
617
                std::cout << "host len too small" << std::endl;
618
#endif
619
                return true;
620
        }
621
        if (host.contains(":"))
622
                host = host.before(":");
623
        if (host.contains("..") || host.endsWith(".")) {
624
#ifdef DGDEBUG
625
                std::cout << "double dots in domain name" << std::endl;
626
#endif
627
                return true;
628
        }
629
        int i, len;
630
        unsigned char c;
631
        len = host.length();
632
        bool containsletter = false;
633
        for (i = 0; i < len; i++) {
634
                c = (unsigned char) host[i];
635
                // If it contains something other than numbers, dots, or [a-fx] (hex encoded IPs),
636
                // IP obfuscation can be ruled out.
637
                if (!containsletter &&
638
                                (((c < '0') || (c > '9'))
639
                                 && (c != '.') && (c != 'x') && (c != 'X')
640
                                 && ((c < 'a') || (c > 'f'))
641
                                 && ((c < 'A') || (c > 'F'))))
642
                        containsletter = true;
643
                if (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z')
644
                        && !(c >= '0' && c <= '9') && c != '.' && c != '-' && c != '_') {
645
#ifdef DGDEBUG
646
                        std::cout << "bad char in hostname" << std::endl;
647
#endif
648
                        return true;
649
                        // only allowed letters, digits, hiphen, dots
650
                }
651

    
652
        }
653
        // no IP obfuscation going on
654
        if (containsletter)
655
                return false;
656
#ifdef DGDEBUG
657
        else
658
                std::cout << "Checking for IP obfuscation in " << host << std::endl;
659
#endif
660
        // Check no IP obfuscation is going on
661
        // This includes IPs encoded as a single decimal number,
662
        // fully or partly hex encoded, and octal encoded
663
        bool first = true;
664
        bool obfuscation = false;
665
        if (host.endsWith("."))
666
                host.chop();
667
        do {
668
                if (!first)
669
                        host = host.after(".");
670
                first = false;
671
                String hostpart(host);
672
                if (host.contains("."))
673
                        hostpart = hostpart.before(".");
674
                // If any part of the host starts with a letter, any letter,
675
                // then we must have a hostname rather than an IP (obscured
676
                // or otherwise).  TLDs never start with a number.
677
                if ((hostpart[0] >= 'a' && hostpart[0] <= 'z') || (hostpart[0] >= 'A' && hostpart[0] <= 'Z'))
678
                        return false;
679
                // If any part of the host begins with 0, it may be hex or octal
680
                if ((hostpart[0] == '0') && (hostpart.length() > 1))
681
                {
682
                        obfuscation = true;
683
                        continue;
684
                }
685
                // Also check range, for decimal obfuscation.
686
                int part = hostpart.toInteger();
687
                if ((part < 0) || (part > 255))
688
                        obfuscation = true;
689
        } while (host.contains("."));
690
        // If we have any obfuscated parts, and haven't proven it's a hostname, it's invalid.
691
        return obfuscation;
692
}
693

    
694
// is this a POST request encapsulating a file upload?
695
bool HTTPHeader::isPostUpload(Socket &peersock)
696
{
697
        if (header.front().toCharArray()[0] != 'P') {
698
                return false;
699
        }
700

    
701
        /*bool answer = false;
702
        int postlen = postdata.buffer_length;
703
        int i;
704
        if (postlen < 14) {        // min length for there to be a match
705
                return false;
706
        }
707
        char *postdatablock = new char[postlen + 64];  // extra 64 for search
708
        try {
709
                postdata.copyToMemory(postdatablock);
710
                for (i = 0; i < postlen; i++) {        // make lowercase char by char
711
                        if (isupper(postdatablock[i])) {
712
                                postdatablock[i] = tolower(postdatablock[i]);
713
                        }
714
                }
715
                RegExp mysearch;
716
                std::string dis("content-type: ");  // signifies file upload
717
                char *p = new char[32];
718
                try {
719
                        for (i = 0; i < (signed) dis.length(); i++) {
720
                                p[i] = dis[i];  // copy it to the block of memory
721
                        }
722
                        char *pend = p + dis.length();  // pointer for search
723
                        char *postdatablockend = postdatablock + postlen;
724
                        // search the post data for the content type header
725
                        char *res = mysearch.search(postdatablock, postdatablockend, p, pend);
726
                        // if we searched all the way to the end without finding it,
727
                        // there is no post upload going on; otherwise, there is
728
                        if (res != postdatablockend) {
729
                                answer = true;
730
                        }
731
                }
732
                catch(exception & e) {
733
                };
734
                delete[]p;
735
        }
736
        catch(exception & e) {
737
        };
738
        delete[]postdatablock;
739
        return answer;*/
740

    
741
        off_t cl = contentLength();
742
        if (((cl > 0) && (cl < 14)) || (getContentType() == "application/x-www-form-urlencoded")) {
743
#ifdef DGDEBUG
744
                std::cout << "Based on content length/type, is not POST upload!" << std::endl;
745
#endif
746
                ispostupload = false;
747
                return false;
748
        }
749
        if (getContentType().length() > 0) {
750
#ifdef DGDEBUG
751
                std::cout << "Based on content length/type, is POST upload!" << std::endl;
752
#endif
753
                ispostupload = true;
754
                return true;
755
        }
756

    
757
#ifdef DGDEBUG
758
        std::cout << "Reading a line of POST data to determine content type: ";
759
#endif
760
        postdatalen = peersock.getLine(postdata, 14, 60, &postdatachopped);
761
#ifdef DGDEBUG
762
        std::cout << postdata << std::endl;
763
#endif
764
        if (postdatalen != 14) {
765
#ifdef DGDEBUG
766
                std::cout << "Is not POST upload!" << std::endl;
767
#endif
768
                ispostupload = false;
769
                return false;
770
        }
771
        String conttype(postdata);
772
        if (conttype.startsWithLower("content-type: ")) {
773
#ifdef DGDEBUG
774
                std::cout << "Is POST upload!" << std::endl;
775
#endif
776
                ispostupload = true;
777
                return true;
778
        } else {
779
#ifdef DGDEBUG
780
                std::cout << "Is not POST upload!" << std::endl;
781
#endif
782
                ispostupload = false;
783
                return false;
784
        }
785
}
786

    
787
// fix bugs in certain web servers that don't obey standards.
788
// actually, it's us that don't obey standards - HTTP RFC says header names
789
// are case-insensitive. - Anonymous SF Poster, 2006-02-23
790
void HTTPHeader::checkheader(bool allowpersistent)
791
{
792
        // are these headers outgoing, or incoming?
793
        bool outgoing = true;
794
        if (header.front().startsWith("HT"))
795
                outgoing = false;
796

    
797
        bool first = true;
798
        for (std::deque<String>::iterator i = header.begin(); i != header.end(); i++) {        // check each line in the headers
799
                // HTTP 1.1 is persistent by default
800
                if (first) {
801
                        if (i->after("HTTP/").startsWith("1.1")) {
802
#ifdef DGDEBUG
803
                                std::cout << "CheckHeader: HTTP/1.1, so assuming persistency" << std::endl;
804
#endif
805
                                waspersistent = true;
806
                                ispersistent = true;
807
                        }
808

    
809
                        // Do not allow persistent connections on CONNECT requests - the browser thinks it has a tunnel
810
                        // directly to the external server, not a connection to the proxy, so it won't be re-used in the
811
                        // manner expected by DG and will result in waiting for time-outs.  Bug identified by Jason Deasi.
812
                        if ((*i)[0] == 'C') {
813
#ifdef DGDEBUG
814
                                std::cout << "CheckHeader: CONNECT request; disallowing persistency" << std::endl;
815
#endif
816
                                allowpersistent = false;
817
                        }
818

    
819
                        first = false;
820

    
821
                        // force HTTP/1.0 - we don't support chunked transfer encoding, possibly amongst other things
822
                        if (outgoing)
823
                                (*i) = i->before(" HTTP/") + " HTTP/1.0\r";
824
                }
825
                // index headers - try to perform the checks in the order the average browser sends the headers.
826
                // also only do the necessary checks for the header type (sent/received).
827
                else if (outgoing && (phost == NULL) && i->startsWithLower("host:")) {
828
                        phost = &(*i);
829
                }
830
                // don't allow through multiple host headers
831
                else if (outgoing && (phost != NULL) && i->startsWithLower("host:")) {
832
                        i->assign("X-DG-IgnoreMe: removed multiple host headers\r");
833
                }
834
                else if (outgoing && (puseragent == NULL) && i->startsWithLower("user-agent:")) {
835
                        puseragent = &(*i);
836
                }
837
                else if (outgoing && i->startsWithLower("accept-encoding:")) {
838
                        (*i) = "Accept-Encoding:" + i->after(":");
839
                        (*i) = modifyEncodings(*i) + "\r";
840
                }
841
                else if ((pcontenttype == NULL) && i->startsWithLower("content-type:")) {
842
                        pcontenttype = &(*i);
843
                }
844
                else if ((pcontentlength == NULL) && i->startsWithLower("content-length:")) {
845
                        pcontentlength = &(*i);
846
                }
847
                // is this ever sent outgoing?
848
                else if ((pcontentdisposition == NULL) && i->startsWithLower("content-disposition:")) {
849
                        pcontentdisposition = &(*i);
850
                }
851
                else if ((!outgoing) && (pcontentencoding == NULL) && i->startsWithLower("content-encoding:")) {
852
                        pcontentencoding = &(*i);
853
                }
854
                else if ((pproxyauthorization == NULL) && i->startsWithLower("proxy-authorization:")) {
855
                        pproxyauthorization = &(*i);
856
                }
857
                else if ((pproxyconnection == NULL) && (i->startsWithLower("proxy-connection:") || i->startsWithLower("connection:"))) {
858
#ifdef DGDEBUG
859
                        std::cout << "CheckHeader: Found Proxy-Connection" << std::endl;
860
#endif
861
                        if (i->contains("live")) {
862
#ifdef DGDEBUG
863
                                std::cout << "CheckHeader: P-C says keep-alive" << std::endl;
864
#endif
865
                                waspersistent = true;
866
                                if (!allowpersistent) {
867
#ifdef DGDEBUG
868
                                        std::cout << "CheckHeader: ... but we aren't allowed to" << std::endl;
869
#endif
870
                                        ispersistent = false;
871
                                        (*i) = i->before(":") + ": Close\r";
872
                                } else {
873
                                        ispersistent = true;
874
                                }
875
                        } else {
876
#ifdef DGDEBUG
877
                                std::cout << "CheckHeader: P-C says close" << std::endl;
878
#endif
879
                                ispersistent = false;
880
                                waspersistent = false;
881
                        }
882
                        pproxyconnection = &(*i);
883
                }
884
                else if (outgoing && (pxforwardedfor == NULL) && i->startsWithLower("x-forwarded-for:")) {
885
                        pxforwardedfor = &(*i);
886
                }
887
                // this one's non-standard, so check for it last
888
                else if (outgoing && (pport = NULL) && i->startsWithLower("port:")) {
889
                        pport = &(*i);
890
                }
891
#ifdef DGDEBUG
892
                std::cout << (*i) << std::endl;
893
#endif
894
        }
895
#ifdef DGDEBUG
896
        std::cout << "CheckHeader flags: AP=" << allowpersistent << " IP=" << ispersistent << " PPC=" << !(pproxyconnection == NULL) << std::endl;
897
#endif
898
        // if a request was HTTP 1.1 and there was no proxy-connection header, we may need to add one
899
        if ((!allowpersistent) && ispersistent) {
900
                // we should only be in this state if pproxyconnection == NULL (otherwise ispersistent will have been falsified earlier)
901
#ifdef DGDEBUG
902
                std::cout << "CheckHeader: Adding our own Proxy-Connection: Close" << std::endl;
903
#endif
904
                header.push_back("Proxy-Connection: Close\r");
905
                pproxyconnection = &(header.back());
906
                ispersistent = false;
907
        } else if (allowpersistent && ispersistent && (pproxyconnection == NULL)) {
908
#ifdef DGDEBUG
909
                std::cout << "CheckHeader: Adding our own Proxy-Connection: Keep-Alive" << std::endl;
910
#endif
911
                // we should only be in this state if HTTP 1.1, persistency allowed, but persistency not explicitly asked for
912
                header.push_back("Proxy-Connection: Keep-Alive\r");
913
                pproxyconnection = &(header.back());
914
        }
915
        // Normalise request headers (fix host, port, first line of header, etc. to all be consistent)
916
        if (outgoing)
917
        {
918
                String newurl(url(true));
919
                setURL(newurl);
920
        }
921
}
922

    
923
// A request may be in the form:
924
//  GET http://foo.bar:80/ HTML/1.0 (if :80 is omitted 80 is assumed)
925
// or:
926
//  GET / HTML/1.0
927
//  Host: foo.bar (optional header in HTTP/1.0, but like HTTP/1.1, we require it!)
928
//  Port: 80 (not a standard header; do any clients send it?)
929
// or:
930
//  CONNECT foo.bar:443  HTTP/1.1
931
// So we need to handle all 3
932

    
933
String HTTPHeader::url(bool withport)
934
{
935
        // Version of URL *with* port is not cached,
936
        // as vast majority of our code doesn't like
937
        // port numbers in URLs.
938
        if (cachedurl.length() > 0 && !withport)
939
                return cachedurl;
940
        port = 80;
941
        bool https = false;
942
        String hostname;
943
        String answer(header.front().after(" "));
944
        answer.removeMultiChar(' ');
945
        if (answer.after(" ").startsWith("HTTP/")) {
946
                answer = answer.before(" HTTP/");
947
        } else {
948
                answer = answer.before(" http/");  // just in case!
949
        }
950
        if (requestType() == "CONNECT") {
951
                https = true;
952
                port = 443;
953
                if (!answer.startsWith("https://")) {
954
                        answer = "https://" + answer;
955
                }
956
        }
957
        if (pport != NULL) {
958
                port = pport->after(" ").toInteger();
959
                if (port == 0 || port > 65535)
960
                        port = (https ? 443 : 80);
961
        }
962
        if (answer.length()) {
963
                if (answer[0] == '/') {        // must be the latter above
964
                        if (phost != NULL) {
965
                                hostname = phost->after(" ");
966
                                hostname.removeWhiteSpace();
967
                                if (hostname.contains(":"))
968
                                {
969
                                        port = hostname.after(":").toInteger();
970
                                        if (port == 0 || port > 65535) {
971
                                                port = (https ? 443 : 80);
972
                                        }
973
                                        hostname = hostname.before(":");
974
                                }
975
                                while (hostname.endsWith("."))
976
                                        hostname.chop();
977
                                if (withport && (port != (https ? 443 : 80)))
978
                                        hostname += ":" + String(port);
979
                                hostname = "http://" + hostname;
980
                                answer = hostname + answer;
981
                        }
982
                        // Squid doesn't like requests in this format. Work around the fact.
983
                        header.front() = requestType() + " " + answer + " HTTP/" + header.front().after(" HTTP/");
984
                } else {        // must be in the form GET http://foo.bar:80/ HTML/1.0
985
                        if (!answer.after("://").contains("/")) {
986
                                answer += "/";  // needed later on so correct host is extracted
987
                        }
988
                        String protocol(answer.before("://"));
989
                        hostname = answer.after("://");
990
                        String url(hostname.after("/"));
991
                        url.removeWhiteSpace();  // remove rubbish like ^M and blanks
992
                        if (url.length() > 0) {
993
                                url = "/" + url;
994
                        }
995
                        hostname = hostname.before("/");  // extra / was added 4 here
996
                        if (hostname.contains("@")) {        // Contains a username:password combo
997
                                hostname = hostname.after("@");
998
                        }
999
                        if (hostname.contains(":")) {
1000
                                port = hostname.after(":").toInteger();
1001
                                if (port == 0 || port > 65535) {
1002
                                        port = (https ? 443 : 80);
1003
                                }
1004
                                hostname = hostname.before(":");  // chop off the port bit
1005
                        }
1006
                        while (hostname.endsWith("."))
1007
                                hostname.chop();
1008
                        if (withport && (port != (https ? 443 : 80)))
1009
                                hostname += ":" + String(port);
1010
                        answer = protocol + "://" + hostname + url;
1011
                }
1012
        }
1013
        if (answer.endsWith("//")) {
1014
                answer.chop();
1015
        }
1016
#ifdef DGDEBUG
1017
        std::cout << "from header url:" << answer << std::endl;
1018
#endif
1019
        // Don't include port numbers in the URL in the cached version.
1020
        // Most of the code only copes with URLs *without* port specifiers.
1021
        if (!withport)
1022
                cachedurl = answer.toCharArray();
1023
        return answer;
1024
}
1025

    
1026
// *
1027
// *
1028
// * Bypass URL/Cookie funcs
1029
// *
1030
// *
1031

    
1032
// chop the GBYPASS or GIBYPASS variable out of a bypass URL
1033
// This function ASSUMES that you really know what you are doing
1034
// Do NOT run this function unless you know that the URL contains a valid bypass code
1035
// Ernest W Lessenger
1036
void HTTPHeader::chopBypass(String url, bool infectionbypass)
1037
{
1038
        if (url.contains(infectionbypass ? "GIBYPASS=" : "GBYPASS=")) {
1039
                if (url.contains(infectionbypass ? "?GIBYPASS=" : "?GBYPASS=")) {
1040
                        String bypass(url.after(infectionbypass ? "?GIBYPASS=" : "?GBYPASS="));
1041
                        header.front() = header.front().before(infectionbypass ? "?GIBYPASS=" : "?GBYPASS=") + header.front().after(bypass.toCharArray());
1042
                } else {
1043
                        String bypass(url.after(infectionbypass ? "&GIBYPASS=" : "&GBYPASS="));
1044
                        header.front() = header.front().before(infectionbypass ? "&GIBYPASS=" : "&GBYPASS=") + header.front().after(bypass.toCharArray());
1045
                }
1046
        }
1047
        cachedurl = "";
1048
}
1049

    
1050
// same for scan bypass
1051
void HTTPHeader::chopScanBypass(String url)
1052
{
1053
        if (url.contains("GSBYPASS=")) {
1054
                if (url.contains("?GSBYPASS=")) {
1055
                        String bypass(url.after("?GSBYPASS="));
1056
                        header.front() = header.front().before("?GSBYPASS=") + header.front().after(bypass.toCharArray());
1057
                } else {
1058
                        String bypass(url.after("&GSBYPASS="));
1059
                        header.front() = header.front().before("&GSBYPASS=") + header.front().after(bypass.toCharArray());
1060
                }
1061
        }
1062
        cachedurl = "";
1063
}
1064

    
1065
// I'm not proud of this... --Ernest
1066
String HTTPHeader::getCookie(const char *cookie)
1067
{
1068
        String line;
1069
        // TODO - do away with loop here somehow, or otherwise speed it up?
1070
        for (std::deque<String>::iterator i = header.begin(); i != header.end(); i++) {
1071
                if (i->startsWithLower("cookie:")) {
1072
                        line = i->after(": ");
1073
                        if (line.contains(cookie)) {        // We know we have the cookie
1074
                                line = line.after(cookie);
1075
                                line.lop();  // Get rid of the '='
1076
                                if (line.contains(";")) {
1077
                                        line = line.before(";");
1078
                                }
1079
                        }
1080
                        // break;  // Technically there should be only one Cookie: header, but...
1081
                }
1082
        }
1083
        line.removeWhiteSpace();
1084
#ifdef DGDEBUG
1085
        std::cout << "Found GBYPASS cookie:" << line << std::endl;
1086
#endif
1087
        return line;
1088
}
1089

    
1090
// add cookie with given name & value to outgoing headers
1091
void HTTPHeader::setCookie(const char *cookie, const char *domain, const char *value)
1092
{
1093
        String line("Set-Cookie: ");
1094
        line += cookie;
1095
        line += "=";
1096
        line += value;
1097
        line += "; path=/; domain=.";
1098
        line += domain;
1099
        line += "\r";
1100
        header.push_back(line);
1101
#ifdef DGDEBUG
1102
        std::cout << "Setting cookie:" << line << std::endl;
1103
#endif
1104
        // no expiry specified so ends with the browser session
1105
}
1106

    
1107
// is this a temporary filter bypass cookie?
1108
bool HTTPHeader::isBypassCookie(String url, const char *magic, const char *clientip)
1109
{
1110
        String cookie(getCookie("GBYPASS"));
1111
        if (!cookie.length()) {
1112
#ifdef DGDEBUG
1113
                std::cout << "No bypass cookie" << std::endl;
1114
#endif
1115
                return false;
1116
        }
1117
        String cookiehash(cookie.subString(0, 32));
1118
        String cookietime(cookie.after(cookiehash.toCharArray()));
1119
        String mymagic(magic);
1120
        mymagic += clientip;
1121
        mymagic += cookietime;
1122
        bool matched = false;
1123
        while(url.contains(".")) {
1124
                String hashed(url.md5(mymagic.toCharArray()));
1125
                if (hashed == cookiehash) {
1126
                        matched = true;
1127
                        break;
1128
                }
1129
                url = url.after(".");
1130
        }
1131
        if (not matched) {
1132
#ifdef DGDEBUG
1133
                std::cout << "Cookie GBYPASS not match" << std::endl;
1134
#endif
1135
                return false;
1136
        }
1137
        time_t timen = time(NULL);
1138
        time_t timeu = cookietime.toLong();
1139
        if (timeu < timen) {
1140
#ifdef DGDEBUG
1141
                std::cout << "Cookie GBYPASS expired: " << timeu << " " << timen << std::endl;
1142
#endif
1143
                return false;
1144
        }
1145
        return true;
1146
}
1147

    
1148
// is this a temporary filter bypass URL?
1149
int HTTPHeader::isBypassURL(String * url, const char *magic, const char *clientip, bool *isvirusbypass)
1150
{
1151
        if ((*url).length() <= 45)
1152
                return false;  // Too short, can't be a bypass
1153

    
1154
        // check to see if this is a bypass URL, and which type it is
1155
        bool filterbypass = false;
1156
        bool virusbypass = false;
1157
        if ((isvirusbypass == NULL) && ((*url).contains("GBYPASS="))) {
1158
                filterbypass = true;
1159
        } else if ((isvirusbypass != NULL) && (*url).contains("GIBYPASS=")) {
1160
                virusbypass = true;
1161
        }
1162
        if (!(filterbypass || virusbypass))
1163
                return 0;
1164

    
1165
#ifdef DGDEBUG
1166
        std::cout << "URL " << (filterbypass ? "GBYPASS" : "GIBYPASS") << " found checking..." << std::endl;
1167
#endif
1168

    
1169
        String url_left((*url).before(filterbypass ? "GBYPASS=" : "GIBYPASS="));
1170
        url_left.chop();  // remove the ? or &
1171
        String url_right((*url).after(filterbypass ? "GBYPASS=" : "GIBYPASS="));
1172

    
1173
        String url_hash(url_right.subString(0, 32));
1174
        String url_time(url_right.after(url_hash.toCharArray()));
1175
#ifdef DGDEBUG
1176
        std::cout << "URL: " << url_left << ", HASH: " << url_hash << ", TIME: " << url_time << std::endl;
1177
#endif
1178

    
1179
        String mymagic(magic);
1180
        mymagic += clientip;
1181
        mymagic += url_time;
1182
        String hashed(url_left.md5(mymagic.toCharArray()));
1183

    
1184
        if (hashed != url_hash) {
1185
#ifdef DGDEBUG
1186
                std::cout << "URL " << (filterbypass ? "GBYPASS" : "GIBYPASS") << " hash mismatch" << std::endl;
1187
#endif
1188
                return 0;
1189
        }
1190

    
1191
        time_t timen = time(NULL);
1192
        time_t timeu = url_time.toLong();
1193

    
1194
        if (timeu < 1) {
1195
#ifdef DGDEBUG
1196
                std::cout << "URL " << (filterbypass ? "GBYPASS" : "GIBYPASS") << " bad time value" << std::endl;
1197
#endif
1198
                return 1;  // bad time value
1199
        }
1200
        if (timeu < timen) {        // expired key
1201
#ifdef DGDEBUG
1202
                std::cout << "URL " << (filterbypass ? "GBYPASS" : "GIBYPASS") << " expired" << std::endl;
1203
#endif
1204
                return 1;  // denotes expired but there
1205
        }
1206
#ifdef DGDEBUG
1207
        std::cout << "URL " << (filterbypass ? "GBYPASS" : "GIBYPASS") << " not expired" << std::endl;
1208
#endif
1209
        if (virusbypass)
1210
                (*isvirusbypass) = true;
1211
        return (int) timeu;
1212
}
1213

    
1214
// is this a scan bypass URL? i.e. a "magic" URL for retrieving a previously scanned file
1215
bool HTTPHeader::isScanBypassURL(String * url, const char *magic, const char *clientip)
1216
{
1217
        if ((*url).length() <= 45)
1218
                return false;  // Too short, can't be a bypass
1219

    
1220
        if (!(*url).contains("GSBYPASS=")) {        // If this is not a bypass url
1221
                return false;
1222
        }
1223
#ifdef DGDEBUG
1224
        std::cout << "URL GSBYPASS found checking..." << std::endl;
1225
#endif
1226

    
1227
        String url_left((*url).before("GSBYPASS="));
1228
        url_left.chop();  // remove the ? or &
1229
        String url_right((*url).after("GSBYPASS="));
1230

    
1231
        String url_hash(url_right.subString(0, 32));
1232
#ifdef DGDEBUG
1233
        std::cout << "URL: " << url_left << ", HASH: " << url_hash << std::endl;
1234
#endif
1235

    
1236
        // format is:
1237
        // GSBYPASS=hash(ip+url+tempfilename+mime+disposition+secret)
1238
        // &N=tempfilename&M=mimetype&D=dispos
1239

    
1240
        String tempfilename(url_right.after("&N="));
1241
        String tempfilemime(tempfilename.after("&M="));
1242
        String tempfiledis(tempfilemime.after("&D="));
1243
        tempfilemime = tempfilemime.before("&D=");
1244
        tempfilename = tempfilename.before("&M=");
1245

    
1246
        String tohash(clientip + url_left + tempfilename + tempfilemime + tempfiledis + magic);
1247
        String hashed(tohash.md5());
1248

    
1249
#ifdef DGDEBUG
1250
        std::cout << "checking hash: " << clientip << " " << url_left << " " << tempfilename << " " << " " << tempfilemime << " " << tempfiledis << " " << magic << " " << hashed << std::endl;
1251
#endif
1252

    
1253
        if (hashed == url_hash) {
1254
                return true;
1255
        }
1256
#ifdef DGDEBUG
1257
        std::cout << "URL GSBYPASS HASH mismatch" << std::endl;
1258
#endif
1259

    
1260
        return false;
1261
}
1262

    
1263
// *
1264
// *
1265
// * URL and Base64 decoding funcs
1266
// *
1267
// *
1268

    
1269
// URL decoding (%xx)
1270
// uses regex pre-compiled on startup
1271
String HTTPHeader::decode(const String &s, bool decodeAll)
1272
{
1273
        if (s.length() < 3) {
1274
                return s;
1275
        }
1276
#ifdef DGDEBUG
1277
        std::cout << "decoding url" << std::endl;
1278
#endif
1279
        if (!urldecode_re.match(s.c_str())) {
1280
                return s;
1281
        }                        // exit if not found
1282
#ifdef DGDEBUG
1283
        std::cout << "matches:" << urldecode_re.numberOfMatches() << std::endl;
1284
        std::cout << "removing %XX" << std::endl;
1285
#endif
1286
        int match;
1287
        int offset;
1288
        int pos = 0;
1289
        int size = s.length();
1290
        String result;
1291
        String n;
1292
        for (match = 0; match < urldecode_re.numberOfMatches(); match++) {
1293
                offset = urldecode_re.offset(match);
1294
                if (offset > pos) {
1295
                        result += s.subString(pos, offset - pos);
1296
                }
1297
                n = urldecode_re.result(match).c_str();
1298
                n.lop();  // remove %
1299
                result += hexToChar(n, decodeAll);
1300
#ifdef DGDEBUG
1301
                std::cout << "encoded: " << urldecode_re.result(match) << " decoded: " << hexToChar(n) << " string so far: " << result << std::endl;
1302
#endif
1303
                pos = offset + 3;
1304
        }
1305
        if (size > pos) {
1306
                result += s.subString(pos, size - pos);
1307
        } else {
1308
                n = "%" + n;
1309
        }
1310
        return result;
1311
}
1312

    
1313
// turn %xx back into original character
1314
String HTTPHeader::hexToChar(const String &n, bool all)
1315
{
1316
        if (n.length() < 2) {
1317
                return String(n);
1318
        }
1319
        static char buf[2];
1320
        unsigned int a, b;
1321
        unsigned char c;
1322
        a = n[0];
1323
        b = n[1];
1324
        if (a >= 'a' && a <= 'f') {
1325
                a -= 87;
1326
        }
1327
        else if (a >= 'A' && a <= 'F') {
1328
                a -= 55;
1329
        }
1330
        else if (a >= '0' && a <= '9') {
1331
                a -= 48;
1332
        }
1333
        else {
1334
                return String("%") + n;
1335
        }
1336
        if (b >= 'a' && b <= 'f') {
1337
                b -= 87;
1338
        }
1339
        else if (b >= 'A' && b <= 'F') {
1340
                b -= 55;
1341
        }
1342
        else if (b >= '0' && b <= '9') {
1343
                b -= 48;
1344
        }
1345
        else {
1346
                return String("%") + n;
1347
        }
1348
        c = a * 16 + b;
1349
        if (all || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c == '-')) {
1350
                buf[0] = c;
1351
                buf[1] = '\0';
1352
                return String(buf);
1353
        } else {
1354
                return String("%") + n;
1355
        }
1356
}
1357

    
1358
// decode a line of base64
1359
std::string HTTPHeader::decodeb64(const String& line)
1360
{                                // decode a block of b64 MIME
1361
        long four = 0;
1362
        int d;
1363
        std::string result;
1364
        int len = line.length() - 4;
1365
        for (int i = 0; i < len; i += 4) {
1366
                four = 0;
1367
                d = decode1b64(line[i + 0]);
1368
                four = four | d;
1369
                d = decode1b64(line[i + 1]);
1370
                four = (four << 6) | d;
1371
                d = decode1b64(line[i + 2]);
1372
                four = (four << 6) | d;
1373
                d = decode1b64(line[i + 3]);
1374
                four = (four << 6) | d;
1375
                d = (four & 0xFF0000) >> 16;
1376
                result += (char) d;
1377
                d = (four & 0xFF00) >> 8;
1378
                result += (char) d;
1379
                d = four & 0xFF;
1380
                result += (char) d;
1381
        }
1382
        return result;
1383
}
1384

    
1385
// decode an individual base64 character
1386
int HTTPHeader::decode1b64(char c)
1387
{
1388
        unsigned char i = '\0';
1389
        switch (c) {
1390
        case '+':
1391
                i = 62;
1392
                break;
1393
        case '/':
1394
                i = 63;
1395
                break;
1396
        case '=':
1397
                i = 0;
1398
                break;
1399
        default:                // must be A-Z, a-z or 0-9
1400
                i = '9' - c;
1401
                if (i > 0x3F) {        // under 9
1402
                        i = 'Z' - c;
1403
                        if (i > 0x3F) {        // over Z
1404
                                i = 'z' - c;
1405
                                if (i > 0x3F) {        // over z so invalid
1406
                                        i = 0x80;  // so set the high bit
1407
                                } else {
1408
                                        // a-z
1409
                                        i = c - 71;
1410
                                }
1411
                        } else {
1412
                                // A-Z
1413
                                i = c - 65;
1414
                        }
1415
                } else {
1416
                        // 0-9
1417
                        i = c + 4;
1418
                }
1419
                break;
1420
        }
1421
        return (int) i;
1422
}
1423

    
1424
// *
1425
// *
1426
// * network send/receive funcs
1427
// *
1428
// *
1429

    
1430
// send headers out over the given socket
1431
// "reconnect" flag gives permission to reconnect to the socket on write error
1432
// - this allows us to re-open the proxy connection on pconns if squid's end has
1433
// timed out but the client's end hasn't. not much use with NTLM, since squid
1434
// will throw a 407 and restart negotiation, but works well with basic & others.
1435
void HTTPHeader::out(Socket * peersock, Socket * sock, int sendflag, bool reconnect) throw(std::exception)
1436
{
1437
        String l;  // for amalgamating to avoid conflict with the Nagel algorithm
1438

    
1439
        if (sendflag == __DGHEADER_SENDALL || sendflag == __DGHEADER_SENDFIRSTLINE) {
1440
                if (header.size() > 0) {
1441
                        l = header.front() + "\n";
1442
#ifdef DGDEBUG
1443
                        std::cout << "headertoclient:" << l << std::endl;
1444
#endif
1445
                        // first reconnect loop - send first line
1446
                        while (true) {
1447
                                if (!(*sock).writeToSocket(l.toCharArray(), l.length(), 0, timeout)) {
1448
                                        // reconnect & try again if we've been told to
1449
                                        if (reconnect) {
1450
                                                // don't try more than once
1451
#ifdef DGDEBUG
1452
                                                std::cout << "Proxy connection broken (1); trying to re-establish..." << std::endl;
1453
                                                syslog(LOG_ERR,"Proxy connection broken (1); trying to re-establish...");
1454
#endif
1455
                                                reconnect = false;
1456
                                                sock->reset();
1457
                                                int rc = sock->connect(o.proxy_ip, o.proxy_port);
1458
                                                if (rc)
1459
                                                        throw std::exception();
1460
                                                continue;
1461
                                        }
1462
                                        throw std::exception();
1463
                                }
1464
                                // if we got here, we succeeded, so break the reconnect loop
1465
                                break;
1466
                        }
1467
                }
1468
                if (sendflag == __DGHEADER_SENDFIRSTLINE) {
1469
                        return;
1470
                }
1471
        }
1472

    
1473
        l = "";
1474

    
1475
        for (std::deque<String>::iterator i = header.begin() + 1; i != header.end(); i++) {
1476
                l += (*i) + "\n";
1477
        }
1478
        l += "\r\n";
1479

    
1480
#ifdef DGDEBUG
1481
        std::cout << "headertoclient:" << l << std::endl;
1482
#endif
1483

    
1484
        // second reconnect loop
1485
        while (true) {
1486
                // send header to the output stream
1487
                // need exception for bad write
1488

    
1489
                if (!(*sock).writeToSocket(l.toCharArray(), l.length(), 0, timeout)) {
1490
                        // reconnect & try again if we've been told to
1491
                        if (reconnect) {
1492
                                // don't try more than once
1493
#ifdef DGDEBUG
1494
                                std::cout << "Proxy connection broken (2); trying to re-establish..." << std::endl;
1495
                                syslog(LOG_ERR,"Proxy connection broken (2); trying to re-establish...");
1496
#endif
1497
                                reconnect = false;
1498
                                sock->reset();
1499
                                int rc = sock->connect(o.proxy_ip, o.proxy_port);
1500
                                if (rc)
1501
                                        throw std::exception();
1502
                                // include the first line on the retry
1503
                                l = header.front() + "\n" + l;
1504
                                continue;
1505
                        }
1506
                        throw std::exception();
1507
                }
1508
                // if we got here, we succeeded, so break the reconnect loop
1509
                break;
1510
        }
1511

    
1512
        if ((!requestType().startsWith("HTTP")) && (pcontentlength != NULL)) {
1513
                if (postdatalen > 0) {
1514
#ifdef DGDEBUG
1515
                        std::cout << "Sending initial POST data chunk" << std::endl;
1516
#endif
1517
                        // Re-add the chopped off \n, if necessary
1518
                        if (postdatachopped) {
1519
#ifdef DGDEBUG
1520
                                std::cout << "Re-adding newline to POST data (postdatalen " << postdatalen << ")" << std::endl;
1521
#endif
1522
                                postdata[postdatalen-1] = '\n';
1523
                                postdata[postdatalen] = '\0';
1524
                        }
1525
                        sock->writeToSockete(postdata, postdatalen, 0, timeout);
1526
                }
1527
#ifdef DGDEBUG
1528
                std::cout << "Opening tunnel for remainder of POST data" << std::endl;
1529
#endif
1530
                FDTunnel fdt;
1531
                off_t remaining = contentLength() - postdatalen;
1532
                if (remaining < 0)
1533
                        throw std::runtime_error("No POST data left to send!?");
1534
                fdt.tunnel(*peersock, *sock, false, remaining, true);
1535
        }
1536
}
1537

    
1538
// discard remainder of POST data
1539
void HTTPHeader::discard(Socket *sock)
1540
{
1541
        static char fred[4096];
1542
        off_t cl = contentLength() - postdatalen;
1543
        int rc;
1544
        while (cl > 0) {
1545
                rc = sock->readFromSocket(fred, ((cl > 4096) ? 4096 : cl), 0, timeout, false);
1546
                if (rc > 0)
1547
                        cl -= rc;
1548
                else
1549
                        break;
1550
        }
1551
}
1552

    
1553
void HTTPHeader::in(Socket * sock, bool allowpersistent, bool honour_reloadconfig)
1554
{
1555
        if (dirty) reset();
1556
        dirty = true;
1557

    
1558
        // the RFCs don't specify a max header line length so this should be
1559
        // dynamic really.  Pointed out (well reminded actually) by Daniel Robbins
1560
        char buff[8192];  // setup a buffer to hold the incomming HTTP line
1561
        String line;  // temp store to hold the line after processing
1562
        line = "----";  // so we get past the first while
1563
        bool firsttime = true;
1564
        bool discard = false;
1565
        while (line.length() > 3 || discard) {        // loop until the stream is
1566
                // failed or we get to the end of the header (a line by itself)
1567

    
1568
                // get a line of header from the stream
1569
                // on the first time round the loop, honour the reloadconfig flag if desired
1570
                // - this lets us break when waiting for the next request on a pconn, but not
1571
                // during receipt of a request in progress.
1572
                (*sock).getLine(buff, 8192, timeout, firsttime ? honour_reloadconfig : false);
1573

    
1574
                // getline will throw an exception if there is an error which will
1575
                // only be caught by HandleConnection()
1576

    
1577
                line = buff;  // convert the line to a String
1578

    
1579
                // ignore crap left in buffer from old pconns (in particular, the IE "extra CRLF after POST" bug) 
1580
                discard = false;
1581
                if (not (firsttime && line.length() <= 3))
1582
                        header.push_back(line);  // stick the line in the deque that holds the header
1583
                else {
1584
                        discard = true;
1585
#ifdef DGDEBUG
1586
                        std::cout << "Discarding unwanted bytes at head of request (pconn closed or IE multipart POST bug)" << std::endl;
1587
#endif
1588
                }
1589
                firsttime = false;
1590
        }
1591
        header.pop_back();  // remove the final blank line of a header
1592
        if (header.size() == 0)
1593
                throw std::exception();
1594

    
1595
        checkheader(allowpersistent);  // sort out a few bits in the header
1596
}