--- squid-3.0_STABLE9/Makefile.am 2008-09-07 10:58:09.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/Makefile.am 2008-09-07 11:36:19.000000000 +0200 @@ -7,7 +7,7 @@ AUTOMAKE_OPTIONS = dist-bzip2 subdir-obj DIST_SUBDIRS = lib snmplib scripts src icons errors contrib doc helpers test-suite tools SUBDIRS = lib @makesnmplib@ scripts src icons errors doc helpers test-suite tools -DISTCLEANFILES = include/stamp-h include/stamp-h[0-9]* +DISTCLEANFILES = include/stamp-h include/stamp-h[0-9]* filters.make DEFAULT_PINGER = $(libexecdir)/`echo pinger | sed '$(transform);s/$$/$(EXEEXT)/'` dist-hook: --- squid-3.0_STABLE9/configure.in 2008-10-08 09:37:21.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/configure.in 2008-10-08 09:40:50.000000000 +0200 @@ -5,7 +5,7 @@ dnl $Id: configure.in,v 1.488.2.3 2008/ dnl dnl dnl -AC_INIT(Squid Web Proxy, 3.0.STABLE9, http://www.squid-cache.org/bugs/, squid) +AC_INIT(Squid Web Proxy, 3.0.STABLE9+filter0.2, http://www.squid-cache.org/bugs/, squid) AC_PREREQ(2.52) AM_CONFIG_HEADER(include/autoconf.h) AC_CONFIG_AUX_DIR(cfgaux) @@ -1854,6 +1854,30 @@ case "$host" in esac AC_SUBST(AR_R) +AM_CONDITIONAL(ENABLE_FILTERS, false) +AC_ARG_ENABLE(filters, +[ --enable-filters Enable filter modules], +[ if test "$enableval" = "yes" ; then + AC_DEFINE(USE_FILTERS,1, [Define this to include filter modules]) + AM_CONDITIONAL(ENABLE_FILTERS, true) + FILTER_MODULES=`(cd $srcdir/src && grep -l FILTER_REGISTRY filters/*.cc)` + echo "Filters enabled" + fi +]) +AC_ARG_WITH(morefilters, +[ --with-morefilters Specify additional filter modules (source path)], +[ if ! test "$withval" = "no" ; then + FILTER_MODULES="$FILTER_MODULES $withval" + fi +]) +if test "$enable_filters" = "yes" ; then + FILTER_OBJS=`echo $FILTER_MODULES | $ac_cv_path_PERL $srcdir/scripts/buildfilters.pl $srcdir/src 3> filters.make` +else + FILTER_OBJS= + echo "" > filters.make +fi +AC_SUBST(FILTER_OBJS) + dnl Check for headers AC_HEADER_DIRENT AC_HEADER_STDC @@ -3405,6 +3429,7 @@ AC_CONFIG_FILES([\ src/fs/Makefile \ src/repl/Makefile \ src/auth/Makefile \ + src/filters/Makefile \ contrib/Makefile \ snmplib/Makefile \ icons/Makefile \ --- squid-3.0_STABLE9/doc/squid-filter.html 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/doc/squid-filter.html 2008-10-09 16:03:46.000000000 +0200 @@ -0,0 +1,484 @@ + + + + Filter modules for Squid + + + + + + + +

Filter modules for Squid 3.0

+ + Version 0.2, October 2008 + +
    +
  1. Purpose +
  2. Prerequisites +
  3. Installation +
  4. Configuration +
  5. Available filters +
  6. Using +
  7. Internals +
  8. Migration from 2.x +
  9. Related projects +
  10. Bugs +
  11. Getting this package +
+ +

This is a project to build filtering capabilities comparable to + those of Muffin into Squid. It consists of a + filtering framework and a set of filter modules. + Currently available filters: +

+ Special features: + + +

Purpose

+ + A filtering proxy allows users to remove unwanted stuff from Web + pages as they browse them. What "unwanted stuff" is obviously + depends on the individual user, but things which are commonly regarded + as annoyances include + + Some of those things can be avoided by filtering URIs, which Squid + can already do via an external redirect program. Others require a + content filter. + +

Usually, a filtering proxy runs standalone and does nothing but + filtering. Users have to configure this proxy in their + browsers, and if they use a caching proxy too, chain them after the + filter. In situations where the user runs Squid anyway (mostly + because of caching for different browsers or a small LAN), it is + convenient to build this capability into Squid. + +

Prerequisites

+ + This patch is for Squid 3.0STABLE9. + Developed and tested under Linux 2.6 with glibc 2.2.5 through 2.4.1 + and gcc 3.3 through 4.1, but should not be system-specific. + +

You need the Squid sources, everything for compiling them, GNU + patch, autoconf 2.50 and automake 1.6. + +

Installation

+ +
    +
  1. Apply the patch: (In the Squid source directory)
    +gzip -cd squid-3.0stable9-filter-0.2.patch.gz | patch -p1
    +
    + +
  2. Run configure:
    +sh bootstrap.sh
    +sh configure (options...) --enable-filters
    +
    + +
  3. Compile and install Squid as usual. +
+ + It is possible to include externally written filter modules with the + configure argument --with-morefilters="/path/to/file.cc /path/to/other.cc.." + +

Configuration

+ +

Defining filters

+ + There is a new squid.conf directive:
+filter_module name [ arguments... ] [ * {allow|deny} acls... ]
+
+ + It tells Squid to define a filter of the given type. The filter + modules can take arguments as documented for the individual modules. + Arguments are separated with whitespace with the same quoting + mechanisms as used elsewhere in squid.conf. A filter type can be + specified in more than one filter_module line, in that case several + filter instances with different parameters will be created. See + below on chaining filters. + +

Each filter line can optionally take an ACL list. This must start + with an asterisk (surrounded by whitespace), followed by either the + keyword allow or deny, followed by one or + more ACLs defined before the filter line. + +

A filter with no ACL specification is applied to every request. A + filter with an ACL specification is applied to each request which is + denied by the ACL. In other words: an allowing ACL allows to + bypass the filter. + +

There is a new option for the http_port directive: + The flag nofilter specifies that requests arriving on + this port will not be filtered. Effectively this makes a + filtering and a non-filtering proxy running at once, on different ports. + +

Pattern files

+ + Pattern files are files containing lists of regular expressions + (POSIX extended, or grep -E syntax), one pattern per + line, against which the URI is matched. Blank lines and lines + starting with a "number sign" are ignored in the usual fashion. + Whenever a pattern file is changed, it gets reloaded at the next + request automatically, no reconfigure needed. A pattern is marked as + case-insensitive by prepending a dash. (To place a real dash at the + start of a pattern use a class, like [-]). Patterns may + not contain literal TABs, use \t instead. + +

There are two types of pattern files: simple lists and replacement + lists. + +

Simple lists

+ These are lists of patterns against which stuff is matched. + In older releases, "stuff" used to mean request URIs, now this is specific + to the individual filters (only the activex filter uses this + feature by now). The old allow lists are no longer used, they have + been obsoleted by ACLs on filters. + +

Replacement lists

+ A replacement list allows URIs to be replaced by other URIs, + in a sed s///-like fashion. This type of pattern file is + used by the redirection filter. Each line in the file consists of + two elements separated by (at least) one TAB character. The + first is a pattern, the second a replacement. The replacement may + contain \1, \2... \9 references to parenthesized + subpatterns; \0 means the whole match and + \* means the complete original URI. The replacement + may also contain \_0, \_1..., \_* references which copy + the same subpatterns in modified base64 encoding (see below). + +

+ A special replacement can be given as a shortcut for + patterns which have no explicit replacement. This default is + specified as replacement for the pattern consisting of a single + exclamation mark, which should be the first line in the file. + Negative match does not work in a replacement list. + +

Modified base64 encoding
+ This encoding is base64 with the characters + / = + (plus, slash, equals) replaced by - _ . (dash, + underscore, dot) respectively. This leads to an URL-safe encoding of + request URIs or part thereof (may be useful for script-based + redirect result postprocessing). + +

Other configuration dependencies

+ + When content filters (see next section) are in use, an appropriate + request_header_replace clause must be set up to filter out + the Accept-Encoding and Accept-Ranges request headers.
+ Use this:
+request_header_replace Accept-Encoding identity
+request_header_replace Accept-Ranges none
+
+ See below for the exact reason. + +

Available filters

+ +

Currently there are the following filters: + +

Filters

+ + Filters fall into one of the following categories: + + Filters of the same category operate either independently or chainable. + Chaining is described below. + In any case, all applicable filters are called in + exactly the order in which they are specified in the config file. + +

redirect

+ + Replaces Squid's external redirect program. Takes one argument, the + name of a replacement list file. Performs pattern substitution on + the requested URI. As soon as a pattern is found, the search stops, + i.e. redirections are not chained within one redirection filter. + However, if the module is specified several times (probably with + different replacement list files), all of them are called in order, + with a later filter operating on the results of an earlier one. If + an external redirector is in use, it is called first, before the + filters. NOFILTER does apply to this filter but not to external + redirectors. + +

script

+ + Removes JavaScript (SCRIPT tags, on... + handlers and browser-specific ways of inserting Javascript into tag + attributes) from HTML pages. (For also blocking JavaScript files use + an ACL against the "application/x-javascript" file type.) + +

activex

+ + Removes ActiveX OBJECT tags from HTML pages. The tags + are preserved, only the classid parameter is replaced + by a dummy, so the page will still be processed correctly (as if by + a non-ActiveX browser). + This filter takes a pattern file as optional argument. This file + contains a list of CLSIDs which are allowed through. + +

gifanim

+ + Breaks animated GIF pictures to remove the annoying blinking. Takes + as argument the allowed number of cycles. If zero, no animation + (show only the first picture). If < zero, stop loading animations + altogether (client shows broken picture). Default is one, meaning + show the whole content but don't blink. + +

bugfinder

+ + Identifies GIF and PNG images not bigger than n by n + pixels. The n value is given as an argument (defaults to 2). + Since these tiny images + are often used as "Web bugs", it may be desirable to block them with + a redirector. The filter can only log them to cache.log; to + effectively block bugs it is necessary to filter the requests for + these URIs, i.e. manual processing of the log file is needed. + +

Each content filter specifies the MIME content type(s) to which it + applies (like image/gif for the gifanim module) and + ignores all other types. + +

Content filters can be chained. When more than one filter applies + to a given MIME content type, every filter operates on the results + of its predecessor. + +

Using

+ + On the client side, no additional configuration is necessary. + Simply set the patched Squid as your proxy. + +

The NOFILTER feature

+ + Users can request that all filters (including the redirection + filter, but not the external redirector) are bypassed for a single + request. This is done by appending .X.nofilter to the + host name in the URL, where the X is replaced by the + Squid's visible host name. Example: to get + http://www.example.com/foo/bar unfiltered from a Squid + called squid.cache, use the URI + http://www.example.com.squid.cache.nofilter/foo/bar. + +

The NOFILTER tag as part of the hostname in the URL implies + that correctly written relative links, including images, linked + scripts etc. on the same server, will also be unfiltered. Apply + the necessary caution. + +

Reason for the inclusion of the Squid's host name is to avoid + that web servers add the NOFILTER tag to their junk banner links + themselves. This works best when visible_hostname, + unique_hostname and the canonical (DNS) host name of + the proxy are all different and not too related, because the origin + server sees the latter two but not the former. + +

Since ".nofilter" is not a valid top level domain, it can't clash + with real host names. + +

Another possible way to bypass filters is to use a + non-filtering port, as described above. Requests arriving on that + port will always bypass all filters. + +

Internals

+ +

Object structure

+ + to be written... + +

A class diagram (created with + ArgoUML) for the filter + classes is here: http://sites.inka.de/bigred/devel/filter-patch.zargo. + +

Library modules

+ + PatFile provides the pattern file facility described + above. It is included in the Squid core and described in + PatFile.h. + +

Debugging options

+ + The following debugging sections and levels (see the + debug_options directive) are used: + + +
Section 92 +  Filter framework +
Section 93 +  Filter modules +
Section 94 +  Library modules (PatFile etc.) +
Level 1    +  Error messages +
Level 3    +  "Filter caught something" messages +
Level 4    +  Initialization/finalization messages +
Level 5    +  Initialization/finalization trace +
Level 8    +  Minor trace +
Level 9    +  Full trace (big!) +
+ +

Content-Encoding

+ + Content filters get the data as delivered by the server. With a + non-identity Content-Encoding the filter would operate on the + encoded data, which it generally can not process correctly. (It has + been confirmed by experience that HTML filters like + script applied to a file with compression encoding + can silently deliver corrupted files, but mostly this is caught by + the HTML parser not accepting null characters.) + +

For this reason, the Accept-Encoding headers should always be + filtered out with an appropriate header_replace + clause. The origin server gets forced to always send unencoded data + with Accept-Encoding: identity. Another + header_replace which sets the Accept-Ranges header to + none causes the client to never try Range requests, which + obviously are unfilterable too. + +

Filters in the data path

+ + (TODO: is this still correct?)

+ + The cache stores always unfiltered objects. Content filtering + happens in the data path from cache or memory to the client. The + filter object is expected to copy the data into a new buffer, so it + can do anything with it including insertions and deletions. + +

The only exception to the rule that filtering happens only in the + path to the client are those filters which alter the + request. This applies to the redirect module. + +

In a cache hierarchy, a filtering cache should only be placed at + the bottom, i.e. where only clients directly access it. If another + cache sits between the filter and client, that one will cache + filtered pages and break the NOFILTER feature. + +

Migration from 2.x

+ + To upgrade a configuration from Squid 2.4 or 2.5 plus filter patch, + note the following: + +
    +
  1. Filters are no longer loadable modules, instead they are + compiled in. A special "htmlfilter" module is no longer needed. + +
  2. The load_module directive has been replaced by + filter_module with slightly different syntax. + +
  3. The nofilter_port directive has been replaced + by the nofilter option in http_port. + +
  4. The allow lists of the individual filters have been replaced + by ACLs applied to the filter. Note that you can get the same + effect as with the old allow pattern file like this:
    +acl allow_activex url_regex "/usr/local/squid/etc/allowlist_activex"
    +filter_module activex * allow allow_activex
    +
    + The "" around the path tell the ACL to read its + patterns from a file. The syntax of this file should be compatible + with the old allow lists. + You have to reconfigure when this file is changed, however. + +
  5. The header filters (cookies) have been obsoleted by + header_access clauses (use Cookie and Set-Cookie + with ACLs for allow lists). + +
  6. The content type filters (allowtype, rejecttype) have been + obsoleted by rep_mime_type ACLs. + +
+ +

Related projects

+ + This project was mostly inspired by Muffin, a modular filtering proxy + written in Java and distributed under GPL. By now that is + the most powerful filter I know of. + +

The Junkbusters web + page has one of the oldest and best known web filters as well as a + very comprehensive resources + list covering most issues from "What is this all about?" to a + list of filtering software (by now most of them are either for + Windows or for pay or both, which indicates there is a real demand + for filtering). + +

Bugs

+ + As with any pre-release, this surely contains bugs. In particular + I'm not sure if I really avoided memory leaks. If someone finds + problems, please tell me. + +

Known issues

+ + +

Getting this package

+ + An up-to-date version of this page can be found at + http://sites.inka.de/bigred/devel/squid-filter.html. + +

The latest release is filter 0.2 for Squid 3.0.STABLE9. Download + at http://sites.inka.de/bigred/devel/squid-3.0stable9-filter-0.2.patch.gz. + +

For use and distribution of this package, the same terms and + conditions as for the Squid package itself (i.e. the GNU General + Public License) apply. Note, however, that using a version or + installation setup which has the NOFILTER feature removed or + restricted in any way is in gross contradiction to the author's + intentions, and people who do so should feel guilty of abuse. + +

Acknowledgements

+ + Development of this version was funded by credativ GmbH. + + + + --- squid-3.0_STABLE9/include/FilterModule.h 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/include/FilterModule.h 2008-09-11 21:50:30.000000000 +0200 @@ -0,0 +1,183 @@ +/* + * DEBUG: section 92 Filter module infrastructure + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#ifndef FILTERMODULE_H +#define FILTERMODULE_H + +#if USE_FILTERS + +#include "ACL.h" +#include "client_side_request.h" +#include "HttpReply.h" +#include "clientStream.h" +#include "SquidString.h" + +typedef class ClientHttpRequest clientHttpRequest; + +typedef enum _squidFilterType { + FIL_DUMMY, + FIL_REDIRECT, /* URI redirector */ + FIL_CONTFILTER, /* Content filter */ + FIL_END +} squidFilterType; + +/* ContentFilter presents the same API as clientStreamNode, but this + version allows implementation of the filter routines in proper + subclasses and always contains a pointer to this ContentFilter object + in the context. + Note the subclasses need to be CBDATA. +*/ + +class FilterModule; + +/*abstract*/ class ContentFilter: public RefCountable { +public: + typedef RefCount Pointer; + virtual ~ContentFilter(); + + virtual CSR readfunc = 0; + virtual CSCB callback = 0; + virtual CSD detach = 0; + virtual CSS status = 0; + +protected: + ContentFilter(RefCount o, clientHttpRequest *req); + RefCount owner; + clientHttpRequest *request; +}; + +/* FilterModule is the entry point for all filters. + It is created at configure time via an appropriate Factory. + At each request, depending on the type of filter, either its filter() + method is called, or its createFilter() method is called to create + a ContentFilter. +*/ +/*abstract*/ class FilterModule: public RefCountable { +public: + typedef RefCount Pointer; + virtual ~FilterModule(); + + /** Module filter function (for filters other than content filters) */ + virtual char *filter(clientHttpRequest *req, const void *arg); + /** Filter object factory (for content filters) */ + virtual ContentFilter::Pointer createFilter(clientHttpRequest *req); + + /** Return true if this filter's ACL denies this request */ + bool matchACL(clientHttpRequest *http); + + /** Called by main.cc: load all filters as of configuration */ + static void initAll(); + /** Called by main.cc: unload all filters */ + static void destroyAll(); + + /** Return description */ + const char *getDescription() const; + + /** Return trigger string */ + const char *getTrigger() const; + + class Factory { + public: + /** Create a filter module */ + virtual FilterModule::Pointer create(const wordlist *args, + acl_access *access) = 0; + virtual ~Factory() {} + + protected: + Factory(const char *name); + + private: + friend class FilterModule; + + const char *name; + static Factory *registry; + Factory *next; + + /** Act on a config line */ + static void filterInit(const char *name, + const wordlist *args, + acl_access *access, + const char *config_info); + }; + + class Iterator { + public: + bool hasNext(); + Pointer next(); + private: + friend class FilterModule; + Iterator(Pointer h) : p(h) {} + Pointer p; + }; + + /** Return all filters registered in a chain */ + static Iterator getChain(squidFilterType typ); + + /** Are any filters registered in this chain? */ + static bool hasChain(squidFilterType typ); + +protected: + FilterModule(acl_access *access); + squidFilterType typ; /* filter type */ + String *description; /* Description of filter purpose */ + String *trigger; /* Trigger argument (like file type) */ + acl_access *access_list; /* acl */ + +private: + friend class Factory; + friend class Iterator; + Pointer next; /* next in chain */ + + static Pointer chain[FIL_END]; /* The filter chains */ + /** Register a newly created filter module */ + static void registerModule(Pointer module, const char *config_info); +}; + +#define FILTER_REGISTRY(typ, name) static typ ::Factory registryEntry(name) + +/* --------------------------- Filtering hooks ----------------------------- */ + +char *canonType(const char *t); +int mtmatch(const char *ct, const char *tr); + +char *moduleRedirect(const char *uri, clientHttpRequest *http); + +typedef struct _repParam { + const char *uri; + HttpReply *rep; +} repParam; + +int moduleContentFilter(clientHttpRequest *http, HttpReply *rep); + +#endif + +#endif --- squid-3.0_STABLE9/include/PatFile.h 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/include/PatFile.h 2008-10-08 13:02:19.000000000 +0200 @@ -0,0 +1,78 @@ +/* + * DEBUG: section 94 Pattern file library + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#ifndef PATFILE_H +#define PATFILE_H + +class PatFile: public RefCountable { +public: + typedef RefCount Pointer; + + /* Create a PatFile object. + name - the file name. + replace - false if a "match" aka "allow list" file, + true if a "replacement" file. + */ + PatFile(const char *name, bool replace); + ~PatFile(); + + /* Check if file has changed, reload if necessary. + Usually called before each patfileMatch(). + */ + void checkReload(); + + /* Unload the patterns, leave empty. Usually no need to call this. */ + void unload(); + + /* Check for match. + uri - the URI to check. + Returns - NULL, if no match was found. + uri, if match was found in a "match" file. + replacement, if match was found in a "replacement" file. + */ + char *match(const char *uri); + +private: + struct _patChain *chain; /* the pattern chain */ + char *fileName; /* name of pattern file */ + time_t mtime; /* modtime of pattern file */ + int replace; /* Process replace rules? */ + char *buf; /* buffer space */ + size_t buflen; /* buffer size */ + + /* Do the \0..\9 substitutions in the replacement pattern */ + char *subst(const char *uri, struct _patChain *r, + const regmatch_t *subs); + int append(int n, const char *x); +}; + +#endif --- squid-3.0_STABLE9/include/util.h 2008-10-08 09:37:46.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/include/util.h 2008-10-08 09:40:50.000000000 +0200 @@ -135,6 +135,8 @@ SQUIDCEXTERN time_t parse_iso3307_time(c SQUIDCEXTERN char *base64_decode(const char *coded); SQUIDCEXTERN const char *base64_encode(const char *decoded); SQUIDCEXTERN const char *base64_encode_bin(const char *data, int len); +SQUIDCEXTERN const char *base64_encode_alt(const char *decoded); +SQUIDCEXTERN const char *base64_encode_bin_alt(const char *data, int len); SQUIDCEXTERN double xpercent(double part, double whole); SQUIDCEXTERN int xpercentInt(double part, double whole); --- squid-3.0_STABLE9/lib/base64.c 2004-09-25 17:38:50.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/lib/base64.c 2008-10-07 20:34:29.000000000 +0200 @@ -3,6 +3,7 @@ */ #include "config.h" +#include "string.h" #include "util.h" #if HAVE_STDIO_H @@ -158,3 +159,37 @@ base64_encode_bin(const char *data, int result[out_cnt] = '\0'; /* terminate */ return result; } + +static void +base64_fixup(char *x) +{ + char c; + do { + c = *x; + if (c == '+') + *x = '-'; + else if (c == '/') + *x = '_'; + else if (c == '=') + *x = '.'; + ++x; + } while (c); +} + +const char * +base64_encode_alt(const char *decoded_str) +{ + static char result[BASE64_RESULT_SZ]; + strcpy(result, base64_encode(decoded_str)); + base64_fixup(result); + return result; +} + +const char * +base64_encode_bin_alt(const char *data, int len) +{ + static char result[BASE64_RESULT_SZ]; + strcpy(result, base64_encode_bin(data, len)); + base64_fixup(result); + return result; +} --- squid-3.0_STABLE9/scripts/buildfilters.pl 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/scripts/buildfilters.pl 2004-11-07 14:20:36.000000000 +0100 @@ -0,0 +1,47 @@ +#!/usr/bin/perl +# +# Take a list of filter module source files on stdin. +# Write the corresponding list of object files on stdout. +# Write make rules for compiling them on fd 3. + +$srcdir = $ARGV[0]; +open(SCRIPT, ">&=3"); +while () { + my @x = split; + foreach $y (@x) { + &process($y); + } +} +exit; + +sub process { + my ($x) = @_; + return unless ($x =~ m,^.*/([^/.]+)\.cc$,); + my $f = $1; + my $n = &get_filter_name($x); + unless ($n) { + print STDERR "No filter module found in $x\n"; + return; + } + print STDERR "Enabling filter module: $n from $x\n"; + print STDOUT "filters/$f.o "; + if ($x =~ m,^filters/,) { + print SCRIPT "filters/$f.o: \$(srcdir)/$x\n"; + } else { + print SCRIPT "filters/$f.o: $x\n"; + } + print SCRIPT "\t", '$(CXXCOMPILE) -c -o $@ $<', "\n"; +} + +sub get_filter_name { + my ($s) = @_; + open(F, $s) || open(F, "$srcdir/$s") || return undef; + while () { + /FILTER_REGISTRY\([^"]*"([^"]*)"/ && do { + close F; + return $1; + } + } + close F; + return undef; +} --- squid-3.0_STABLE9/src/Makefile.am 2008-09-07 10:58:27.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/src/Makefile.am 2008-09-07 15:51:37.000000000 +0200 @@ -28,7 +28,7 @@ endif TESTS=$(check_PROGRAMS) check_PROGRAMS= -SUBDIRS = fs repl auth +SUBDIRS = fs repl auth filters DELAY_POOL_ALL_SOURCE = \ CommonPool.h \ @@ -537,6 +537,7 @@ squid_SOURCES = \ Packer.h \ Parsing.cc \ Parsing.h \ + PatFile.cc \ $(XPROF_STATS_SOURCE) \ pconn.cc \ pconn.h \ @@ -660,6 +661,8 @@ squid_LDADD = \ @SNMPLIB@ \ @ICAP_LIBS@ \ @SSLLIB@ \ + @FILTER_OBJS@ \ + filters/libfilters.a \ -lmiscutil \ @XTRA_LIBS@ \ @EPOLL_LIBS@ \ @@ -672,7 +675,9 @@ squid_DEPENDENCIES = $(top_builddir)/lib @REPL_OBJS@ \ @AUTH_LINKOBJS@ \ @AUTH_OBJS@ \ - @ICAP_LIBS@ + @ICAP_LIBS@ \ + @FILTER_OBJS@ \ + filters/libfilters.a ICAP_libicap_a_SOURCES = \ ICAP/AsyncJob.cc \ @@ -838,6 +843,7 @@ ufsdump_SOURCES = \ net_db.cc \ Packer.cc \ Parsing.cc \ + PatFile.cc \ $(XPROF_STATS_SOURCE) \ pconn.cc \ peer_digest.cc \ @@ -897,6 +903,8 @@ ufsdump_LDADD = \ @SNMPLIB@ \ @ICAP_LIBS@ \ @SSLLIB@ \ + @FILTER_OBJS@ \ + filters/libfilters.a \ -lmiscutil \ @XTRA_LIBS@ \ @EPOLL_LIBS@ \ @@ -909,7 +917,9 @@ ufsdump_DEPENDENCIES = $(top_builddir)/l @REPL_OBJS@ \ @AUTH_LINKOBJS@ \ @AUTH_OBJS@ \ - @ICAP_LIBS@ + @ICAP_LIBS@ \ + @FILTER_OBJS@ \ + filters/libfilters.a nodist_ufsdump_SOURCES = \ repl_modules.cc \ --- squid-3.0_STABLE9/src/PatFile.cc 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/PatFile.cc 2008-10-08 13:02:19.000000000 +0200 @@ -0,0 +1,264 @@ +/* + * DEBUG: section 94 Pattern file library + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#include "squid.h" +#include "PatFile.h" + +#include +#include +#include + +#define BUFINCR 1024 /* Size increment of replacement buffer */ + +typedef struct _patChain { + struct _patChain *next; /* next in chain */ + regex_t patbuf; /* compiled pattern */ + union { + int negate; /* non-replace: is negative match */ + char *replace; /* replace: replacement string */ + } action; +} patChain; + +PatFile::PatFile(const char *name, bool rpl) + : chain(NULL), fileName(xstrdup(name)), mtime(0), replace(rpl), + buf(NULL), buflen(0) { + debug(94, 5) ("PatFile %s\n", fileName); +} + +PatFile::~PatFile() { + debug(94, 5) ("~PatFile %s\n", fileName); + unload(); + xfree(fileName); + xfree(buf); +} + +void PatFile::checkReload() +{ + struct stat st; + FILE *f; + int ln = 0, np = 0, e, neg, fl; + char buf[256]; + char *p0, *p1, *p2, *bang = NULL; + patChain *n, *l = NULL; + + if (stat(this->fileName, &st)<0) { + debug(94, 1) ("%s: stat: %s\n", this->fileName, xstrerror()); + return; + } + if (this->mtime >= st.st_mtime) + return; + this->mtime = st.st_mtime; + + f = fopen(this->fileName, "r"); + if (!f) { + debug(94, 1) ("%s: open: %s\n", this->fileName, xstrerror()); + return; + } + debug(94, 4) ("patfile: reloading %s\n", this->fileName); + unload(); + + while (fgets(buf, sizeof(buf), f)) { + ++ln; + neg = 0; + fl = this->replace ? REG_EXTENDED : REG_EXTENDED|REG_NOSUB; + for (p0=buf; *p0==' ' || *p0=='\t'; ++p0); + if (*p0=='#' || *p0=='\n' || *p0=='\0') + continue; + for (p1=p0; *p1 && *p1!='\t' && *p1!='\n'; ++p1); + if (*p1) { + for (*p1++='\0'; *p1=='\t'; ++p1); + for (p2=p1; *p2 && *p2!='\n'; ++p2); + *p2='\0'; + if (*p1=='\t' || *p1=='\n' || *p1=='!' || *p1=='\0') + p1=NULL; + } else { + p1=NULL; + } + if (*p0=='-') { + ++p0; + fl |= REG_ICASE; + } + if (*p0=='!') { + if (this->replace) { + if (bang) + debug(94, 1) ("%s:%d: duplicate reject definition\n", + this->fileName, ln); + else + if (p1) + bang=xstrdup(p1); + continue; + } else { + ++p0; + neg = 1; + } + } + n = (patChain*)xmalloc(sizeof(patChain)); + e = regcomp(&n->patbuf, p0, fl); + if (e) { + (void)regerror(e, &n->patbuf, buf, sizeof(buf)); + debug(94, 1) ("%s:%d: regex error: %s\n", this->fileName, ln, buf); + xfree(n); + continue; + } + if (this->replace) { + if (p1) + n->action.replace = xstrdup(p1); + else if (bang) + n->action.replace = xstrdup(bang); + else { + debug(94, 1) ("%s:%d: missing replacement\n", + this->fileName, ln); + n->action.replace = NULL; + } + } else { + n->action.negate = neg; + } + n->next = NULL; + if (l) + l->next = n; + else + this->chain = n; + l = n; + ++np; + } + fclose(f); + if (bang) + xfree(bang); + debug(94, 4) ("%s: loaded %d patterns\n", this->fileName, np); +} + +void PatFile::unload() +{ + patChain *p0, *p; + for (p = this->chain; p; p = p0) { + p0 = p->next; + regfree(&p->patbuf); + if (this->replace) + xfree(p->action.replace); + xfree(p); + } + this->chain = NULL; +} + +#define MAXSUBPAT 10 + +int PatFile::append(int n, const char *x) +{ + size_t l = strlen(x); + if (n + l >= buflen) { + buflen += l; + buf = (char*) xrealloc(buf, buflen); + } + strcpy(buf + n, x); + return n + l; +} + +/* Do the \0..\9 substitutions in the replacement pattern */ +char *PatFile::subst(const char *uri, struct _patChain *r, + const regmatch_t *subs) +{ + char *p0; + int i, k; + unsigned int n; + char c; + + if (!(p0 = r->action.replace)) + return NULL; /* Should return generic reject URI */ + n = 0; + i = k = -1; + while (*p0 || i>=0) { + if (k >= 0) { + if (i < subs[k].rm_eo) { + c = uri[i++]; + } else { + k = i = -1; + continue; + } + } else if (*p0=='\\' && p0[1]>='0' && p0[1]<='9') { + k = p0[1] - '0'; + p0 += 2; + if ((i = subs[k].rm_so) < 0) + k = -1; + continue; + } else if (*p0=='\\' && p0[1]=='_' && p0[2]>='0' && p0[2]<='9') { + int k1 = p0[2] - '0'; + p0 += 3; + int i1 = subs[k1].rm_so; + int i2 = subs[k1].rm_eo; + if (i1 >= 0 && i2 >= 0) { + const char *x = base64_encode_bin_alt(uri + i1, i2 - i1); + n = append(n, x); + } + continue; + } else if (*p0=='\\' && p0[1]=='*') { + p0 += 2; + n = append(n, uri); + continue; + } else if (*p0=='\\' && p0[1]=='_' && p0[2]=='*') { + p0 += 3; + const char *x = base64_encode_alt(uri); + n = append(n, x); + continue; + } else { + c = *p0++; + } + if (n+2 >= buflen) { + buflen += BUFINCR; + buf = (char*) xrealloc(buf, buflen); + } + buf[n++] = c; + } + buf[n] = '\0'; + return buf; +} + +char *PatFile::match(const char *uri) +{ + patChain *r; + regmatch_t subs[MAXSUBPAT]; + int i; + + for (r=this->chain, i=0; r; r=r->next, ++i) { + if (!regexec(&r->patbuf, uri, MAXSUBPAT, subs, 0)) { + char *res; + if (this->replace) { + res = subst(uri, r, subs); + } else { + res = r->action.negate ? NULL : (char*)uri; /* XX */ + } + debug(94, 8) ("patfileMatch: <%s> matched %d returning <%s>\n", uri, i, res); + return res; + } + } + return NULL; +} + --- squid-3.0_STABLE9/src/cache_cf.cc 2008-09-07 10:58:25.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/src/cache_cf.cc 2008-09-07 15:51:37.000000000 +0200 @@ -1935,6 +1935,70 @@ free_cachemgrpasswd(cachemgr_passwd ** h } } +#ifdef USE_FILTERS +static void +dump_module(StoreEntry * entry, const char *name, filter_module * list) +{ + wordlist *w; + while (list != NULL) { + storeAppendPrintf(entry, "%s %s", name, list->module); + for (w = list->params; w != NULL; w = w->next) { + storeAppendPrintf(entry, " %s", w->key); + } + if (list->access_list) + dump_acl_access(entry, " *", list->access_list); + else + storeAppendPrintf(entry, "\n"); + list = list->next; + } +} + +static char * +parse_wordlist_stopstar(wordlist ** list) +{ + char *token; + char *t = strtok(NULL, ""); + + while (t != NULL && *t != '*') { + if ((token = strwordtok(NULL, &t)) == NULL) { + return NULL; + } + wordlistAdd(list, token); + } + strtok(t, w_space); // push back rest of line in strtok buffer + return t; +} + +static void +parse_module(filter_module ** head) +{ + filter_module *p = (filter_module*) xcalloc(1, sizeof(filter_module)); + filter_module **P; + parse_string(&p->module); + char *a = parse_wordlist_stopstar(&p->params); + if (a != NULL) { + parse_acl_access(&p->access_list); + } + p->cfg_filename = cfg_filename; + p->config_lineno = config_lineno; + for (P = head; *P; P = &(*P)->next); + *P = p; +} + +static void +free_module(filter_module ** head) +{ + filter_module *p; + while ((p = *head) != NULL) { + *head = p->next; + xfree(p->module); + free_acl_access(&p->access_list); + wordlistDestroy(&p->params); + xfree(p); + } +} +#endif + static void dump_denyinfo(StoreEntry * entry, const char *name, acl_deny_info_list * var) { @@ -2875,6 +2939,10 @@ parse_http_port_option(http_port_list * s->accel = 1; } else if (strcmp(token, "accel") == 0) { s->accel = 1; +#if USE_FILTERS + } else if (strcmp(token, "nofilter") == 0) { + s->nofilter = 1; +#endif } else if (strncmp(token, "disable-pmtu-discovery=", 23) == 0) { if (!strcasecmp(token + 23, "off")) s->disable_pmtu_discovery = DISABLE_PMTU_OFF; --- squid-3.0_STABLE9/src/cf.data.depend 2008-09-07 10:58:19.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/src/cf.data.depend 2008-09-07 15:51:37.000000000 +0200 @@ -19,6 +19,7 @@ delay_pool_rates delay_class denyinfo acl eol externalAclHelper auth_param +filter_module module acl hostdomain cache_peer hostdomaintype cache_peer http_header_access @@ -32,6 +33,7 @@ int kb_int64_t kb_size_t logformat +module onoff peer peer_access cache_peer acl --- squid-3.0_STABLE9/src/cf.data.pre 2008-10-08 09:37:36.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/src/cf.data.pre 2008-10-08 09:40:50.000000000 +0200 @@ -955,6 +955,10 @@ DOC_START name= Specifies a internal name for the port. Defaults to the port specification (port or addr:port) + nofilter Requests on this port are not filtered. + (Only available when --enable-filters + is given to the configure script.) + If you run Squid on a dual-homed machine with an internal and an external interface we recommend you to specify the internal address:port in http_port. This way Squid will only be @@ -3912,6 +3916,25 @@ COMMENT_START ----------------------------------------------------------------------------- COMMENT_END +NAME: filter_module +TYPE: module +DEFAULT: none +IFDEF: USE_FILTERS +LOC: Config.modules +DOC_START + Specify here any number of filters. + Each filter specification consists of the module name, + optional arguments to the filters, and optionally a "*" + followed by an ACL specification. + The filter is applied when the ACL _denies_, so "allow" means + "no filtering". With no ACL, the filter is always applied. + See the documentation for the individual modules. + + Only available when --enable-filters is given to the configure script. +filter_module redirect @DEFAULT_PREFIX@/etc/redirect +filter_module bugfinder 3 * allow to_localhost +DOC_END + NAME: digest_generation IFDEF: USE_CACHE_DIGESTS TYPE: onoff --- squid-3.0_STABLE9/src/client_side.cc 2008-10-08 09:37:41.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/src/client_side.cc 2008-10-08 09:40:50.000000000 +0200 @@ -2207,6 +2207,11 @@ clientProcessRequest(ConnStateData::Poin request->flags.transparent = http->flags.transparent; +#if USE_FILTERS + if (http->getConn()->port->nofilter) + request->flags.filter = 0; +#endif + #if LINUX_TPROXY request->flags.tproxy = conn->port->tproxy && need_linux_tproxy; --- squid-3.0_STABLE9/src/client_side_reply.cc 2008-09-07 10:58:25.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/src/client_side_reply.cc 2008-09-07 15:51:37.000000000 +0200 @@ -47,6 +47,9 @@ #if USE_SQUID_ESI #include "ESI.h" #endif +#if USE_FILTERS +#include "FilterModule.h" +#endif #include "MemObject.h" #include "ACLChecklist.h" #include "ACL.h" @@ -1780,6 +1783,14 @@ clientReplyContext::processReplyAccessRe assert (!flags.headersSent); flags.headersSent = true; +#if USE_FILTERS + if (http->request->flags.filter && + body_size > 0 && + reply->sline.status != HTTP_FORBIDDEN && + !alwaysAllowResponse(reply->sline.status)) { + moduleContentFilter(http, reply); + } +#endif StoreIOBuffer tempBuffer; char *buf = next()->readBuffer.data; char *body_buf = buf + reply->hdr_sz; --- squid-3.0_STABLE9/src/client_side_request.cc 2008-09-07 10:58:25.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/src/client_side_request.cc 2008-09-11 21:50:32.000000000 +0200 @@ -54,6 +54,9 @@ #include "Store.h" #include "HttpReply.h" #include "MemObject.h" +#if USE_FILTERS +#include "FilterModule.h" +#endif #include "ClientRequestContext.h" #include "SquidTime.h" #include "wordlist.h" @@ -779,6 +782,11 @@ ClientRequestContext::clientRedirectDone { HttpRequest *new_request = NULL; HttpRequest *old_request = http->request; +#if USE_FILTERS + if (old_request->flags.filter) { + result = moduleRedirect(result ? result : http->uri, http); + } +#endif debugs(85, 5, "clientRedirectDone: '" << http->uri << "' result=" << (result ? result : "NULL")); assert(redirect_state == REDIRECT_PENDING); redirect_state = REDIRECT_DONE; @@ -1027,6 +1035,13 @@ ClientHttpRequest::doCallouts() calloutContext->clientRedirectStart(); return; } +#if USE_FILTERS + else if (FilterModule::hasChain(FIL_REDIRECT)) { + calloutContext->redirect_state = REDIRECT_PENDING; + calloutContext->clientRedirectDone(NULL); + return; + } +#endif } if (!calloutContext->interpreted_req_hdrs) { --- squid-3.0_STABLE9/src/filters/BufferedContentFilter.cc 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/BufferedContentFilter.cc 2008-09-07 15:51:37.000000000 +0200 @@ -0,0 +1,134 @@ +/* + * DEBUG: section 94 Filter libraries + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#include "BufferedContentFilter.h" + +#define MODNAME "BufferedContentFilter" + +BufferedContentFilter::BufferedContentFilter(FilterModule::Pointer o, + clientHttpRequest *req) + : ContentFilter(o, req), eof(0), aborted(0), rpos(0), wpos(0) { + out.init(); +} + +BufferedContentFilter::~BufferedContentFilter() { + out.clean(); +} + +void BufferedContentFilter::callback(clientStreamNode *thisNode, + ClientHttpRequest *http, HttpReply *rep, + StoreIOBuffer receivedData) { + + if (rep == NULL && receivedData.data == NULL && receivedData.length == 0) { + if (pushdown(thisNode, http, rep)) + return; + debug(94, 8) (MODNAME ": Telling recipient EOF on CALLBACK\n"); + StoreIOBuffer tmpBuf; + clientStreamCallback(thisNode, http, rep, tmpBuf); + eof = 1; + return; + } + + if (receivedData.length > 0 && + cfFilter(receivedData.data, receivedData.length) < 0) { + aborted = 1; + } + rpos += receivedData.length; + if (pushdown(thisNode, http, rep)) + return; + // need more + StoreIOBuffer tmpBuf; + tmpBuf.offset = rpos; + tmpBuf.length = INTBUFSIZ; + tmpBuf.data = readbuf; + clientStreamRead(thisNode, http, tmpBuf); +} + +bool BufferedContentFilter::pushdown(clientStreamNode *thisNode, + ClientHttpRequest *http, + HttpReply *rep) { + if (out.size == 0) { + return false; + } + char *bb = out.buf; + + StoreIOBuffer writeBuffer = thisNode->next()->readBuffer; + writeBuffer.offset = wpos; + writeBuffer.data = bb; + writeBuffer.length = out.size; + clientStreamCallback(thisNode, http, rep, writeBuffer); + + wpos += out.size; + // out.size = 0; //XXX should be memBufReset(&out); + out.reset(); + return true; +} + +void BufferedContentFilter::readfunc(clientStreamNode *thisNode, + ClientHttpRequest *http) { + StoreIOBuffer tmpBuf; + if (eof) { + debug(94, 8) (MODNAME ": Telling recipient EOF on READ\n"); + clientStreamCallback(thisNode, http, NULL, tmpBuf); + return; + } + if (pushdown(thisNode, http, NULL)) { + return; + } + // call up + tmpBuf.offset = rpos; + tmpBuf.length = INTBUFSIZ; + tmpBuf.data = readbuf; + clientStreamRead(thisNode, http, tmpBuf); +} + +void BufferedContentFilter::detach(clientStreamNode *thisNode, + ClientHttpRequest *http) { + + // detach + clientStreamDetach(thisNode, http); +} + +clientStream_status_t BufferedContentFilter::status(clientStreamNode *thisNode, + ClientHttpRequest *http) { + + if (eof) { + debug(94, 8) (MODNAME ": Telling recipient EOF on STATUS\n"); + return STREAM_UNPLANNED_COMPLETE; + } + if (aborted) { + debug(94, 8) (MODNAME ": Telling recipient ERROR on STATUS\n"); + return STREAM_FAILED; + } + // pass through + return clientStreamStatus(thisNode, http); +} --- squid-3.0_STABLE9/src/filters/BufferedContentFilter.h 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/BufferedContentFilter.h 2004-12-05 00:56:37.000000000 +0100 @@ -0,0 +1,65 @@ +/* + * DEBUG: section 94 Filter libraries + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#ifndef BUFFEREDCONTENTFILTER_H +#define BUFFEREDCONTENTFILTER_H + +#include "squid.h" +#include "FilterModule.h" + +#define INTBUFSIZ 1024 + +/*abstract*/ class BufferedContentFilter: public ContentFilter { +public: + virtual ~BufferedContentFilter(); + + CSR readfunc; + CSCB callback; + CSD detach; + CSS status; + +protected: + BufferedContentFilter(FilterModule::Pointer o, clientHttpRequest *req); + virtual int cfFilter(const char *buf, int len) = 0; + MemBuf out; + unsigned eof: 1; + unsigned aborted: 1; + +private: + bool pushdown(clientStreamNode *thisNode, + ClientHttpRequest *http, HttpReply *rep); + char readbuf[INTBUFSIZ]; + size_t rpos; + size_t wpos; +}; + +#endif --- squid-3.0_STABLE9/src/filters/ContentFilter.cc 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/ContentFilter.cc 2008-09-07 15:51:37.000000000 +0200 @@ -0,0 +1,122 @@ +/* + * DEBUG: section 94 Filter libraries + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#include "squid.h" +#include "clientStream.h" +#include "client_side_request.h" +#include "HttpReply.h" +#include "FilterModule.h" + +ContentFilter::ContentFilter(FilterModule::Pointer o, clientHttpRequest *req) + : owner(o), request(req) { +} + +ContentFilter::~ContentFilter() { + debug(94, 5) ("~ContentFilter %p\n", this); +} + +static ContentFilter::Pointer prepareContext(clientStreamNode *thisNode, + ClientHttpRequest *http) { + assert(thisNode != NULL); + assert(cbdataReferenceValid (thisNode)); + ContentFilter::Pointer context = + dynamic_cast(thisNode->data.getRaw()); + assert(context.getRaw() != NULL); + return context; +} + +void cfProcessStream(clientStreamNode *thisNode, + ClientHttpRequest *http, + HttpReply *rep, + StoreIOBuffer receivedData) { + ContentFilter::Pointer context = prepareContext(thisNode, http); + debug(94, 9) ("cfProcessStream %p %lld %d %p\n", context.getRaw(), + receivedData.offset, receivedData.length, receivedData.data); + context->callback(thisNode, http, rep, receivedData); +} + +void cfStreamRead(clientStreamNode *thisNode, + ClientHttpRequest *http) { + ContentFilter::Pointer context = prepareContext(thisNode, http); + debug(94, 9) ("cfStreamRead %p\n", context.getRaw()); + context->readfunc(thisNode, http); +} + +void cfStreamDetach(clientStreamNode *thisNode, + ClientHttpRequest *http) { + ContentFilter::Pointer context = prepareContext(thisNode, http); + debug(94, 9) ("cfStreamDetach %p\n", context.getRaw()); + context->detach(thisNode, http); +} + +clientStream_status_t cfStreamStatus(clientStreamNode *thisNode, + ClientHttpRequest *http) { + ContentFilter::Pointer context = prepareContext(thisNode, http); + debug(94, 9) ("cfStreamStatus %p\n", context.getRaw()); + return context->status(thisNode, http); +} + +int moduleContentFilter(clientHttpRequest *http, HttpReply *rep) { + const char *typ = rep->content_type.buf(); + if (!typ) { + return 0; // ? + } + char *tt = canonType(typ); + int count = 0; + for (FilterModule::Iterator i = FilterModule::getChain(FIL_CONTFILTER); + i.hasNext();) { + FilterModule::Pointer f = i.next(); + if (mtmatch(tt, f->getTrigger()) && f->matchACL(http)) { + ContentFilter::Pointer c = f->createFilter(http); + debug(94, 8) ("moduleContentFilter: pushing %p (%s) %s\n", + c.getRaw(), f->getDescription(), http->log_uri); + clientStreamInsertHead(&http->client_stream, + cfStreamRead, cfProcessStream, + cfStreamDetach, cfStreamStatus, + ClientStreamData(c.getRaw())); + ++count; + } + } + if (count > 0) { + // clear headers which would confuse clients + HttpHeader *hdr = &rep->header; + // XXX: why is hdr->mask inconsistent here?? + // the httpHeaderDelByName fixes this. + hdr->delByName("ETag"); + hdr->delById(HDR_ACCEPT_RANGES); + //httpHeaderDelById(hdr, HDR_ETAG); + hdr->delById(HDR_CONTENT_LENGTH); + hdr->delById(HDR_CONTENT_MD5); + } + xfree(tt); + return count; +} --- squid-3.0_STABLE9/src/filters/ContentFilterHtml.cc 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/ContentFilterHtml.cc 2008-09-07 15:51:37.000000000 +0200 @@ -0,0 +1,381 @@ +/* + * DEBUG: section 94 Filter libraries + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#include "squid.h" +#include "Debug.h" +#include "FilterModule.h" +#include "ContentFilterHtml.h" + +#undef DEBUG_TAG_PARSER +#undef FIX_BROKEN_HTML +#define MODNAME "htmlfilter" + +void *ContentFilterHtml::operator new(size_t byteCount) { + assert(byteCount == sizeof(ContentFilterHtml)); + CBDATA_INIT_TYPE(ContentFilterHtml); + ContentFilter *res = cbdataAlloc(ContentFilterHtml); + cbdataReference(res); + return res; +} + +void ContentFilterHtml::operator delete(void *address) { + ContentFilter *t = static_cast(address); + cbdataFree(t); + cbdataReferenceDone(t); +} + +CBDATA_CLASS_INIT(ContentFilterHtml); + +FilterModuleHtml::FilterModuleHtml(const wordlist *args, + acl_access *access) + : FilterModule(access) { + typ = FIL_CONTFILTER; + trigger = new String("text/html"); +} + +FilterModuleHtml::~FilterModuleHtml() { +} + +ContentFilterHtml::ContentFilterHtml(FilterModule::Pointer o, + clientHttpRequest *req) + : BufferedContentFilter(o, req), inTag(T_TEXT), inBSQuot(0), eating(0) { + tag.init(); +} + +ContentFilterHtml::~ContentFilterHtml() { + tag.clean(); +} + + + +/* Insert a tag. Can be called from the module. */ +void ContentFilterHtml::insertTag(int nattribs, + const char * const attribs[], + const char * const values[]) +{ + int i, len; + int *alen, *vlen; + if (eating) + return; + +#ifdef HAVE_ALLOCA + alen = (int*) alloca(2*nattribs*sizeof(int)); +#else + alen = (int*) xmalloc(2*nattribs*sizeof(int)); +#endif + vlen = alen + nattribs; + for (i=1, len=2+strlen(attribs[0]); i", 1); +#ifndef HAVE_ALLOCA + xfree(alen); +#endif +} + +#ifdef DEBUG_TAG_PARSER +static void debug_tag_parser(int nattribs, char *attribs[], char *values[]) +{ + int i; + MemBuf mb; + mb.init(); + for (i=0; i gives length/2 attributes */ + char **atts = (char**) alloca(tag.size*sizeof(char *)); +#else + char *scratch = (char*) xmalloc(tag.size); + char **atts = (char**) xcalloc(tag.size, sizeof(char *)); +#endif + char **vals = atts+tag.size/2; + int res, i, n = 0; + vpos v = V_WSP, v0 = V_WSP; + char *p = scratch+1; + +#ifdef HAVE_ALLOCA + memset(atts, 0, tag.size*sizeof(char *)); +#endif + + assert(tag.buf[0]=='<'); + xmemcpy(scratch, tag.buf, tag.size); + scratch[tag.size-1] = '\0'; + i = 1; + for (; i0) { + ++p; + v = V_VAL; + continue; + } + if (scratch[i]=='-' && scratch[i+1]=='-') { + v = V_COMM; + i+=2; + continue; + } + v = V_ATT; + continue; + case V_ATT: + if (isspace(scratch[i])) { + scratch[i] = '\0'; + atts[n++] = p; + v = V_WSP; + } else if (scratch[i]=='=') { + scratch[i] = '\0'; + atts[n++] = p; + p = scratch + i + 1; + v = V_SVAL; + } + continue; + case V_SVAL: + if (isspace(scratch[i])) + continue; + p = scratch + i; + v = V_VAL; + /* FALL THRU */ + case V_VAL: + if (isspace(scratch[i])) { + if (v0 == V_DQUOT || v0 == V_SQUOT) { + scratch[i-1] = '\0'; + vals[n-1] = p+1; + } else { + scratch[i] = '\0'; + vals[n-1] = p; + } + v0 = v = V_WSP; + } else if (scratch[i]=='\'' || scratch[i]=='"') { + v0 = v = (vpos) scratch[i]; + } + continue; + case V_DQUOT: + case V_SQUOT: + if (scratch[i]==v && scratch[i-1]!='\\') + v = V_VAL; + continue; + case V_COMM: + if (i>2 && isspace(scratch[i]) && + scratch[i-1]=='-' && scratch[i-2]=='-') { + scratch[i] = '\0'; + atts[n++] = p; + v = V_WSP; + } + continue; + } + } + if (v==V_ATT || v==V_COMM) { + atts[n++]=p; + } else if (v==V_VAL) { + if (v0==V_DQUOT || v0==V_SQUOT) { + scratch[i-1] = '\0'; + vals[n-1] = p+1; + } else { + vals[n-1] = p; + } + } + + if (n > 0) { +#ifdef DEBUG_TAG_PARSER + if (Debug::Levels[94]>=9) + debug_tag_parser(n, atts, vals); +#endif + res = processTag(n, atts, vals); + } else { + res = 0; + } +#ifndef HAVE_ALLOCA + xfree(scratch); + xfree(atts); +#endif + return res; +} + +int ContentFilterHtml::cfFilter(const char *buf, int len) +{ + int i; + for (i=0; ilog_uri); + return -1; +#endif + } + switch (inTag) { + case T_TEXT: + if (buf[i]=='<') { + inTag = T_STAG; + goto storeTag; + } + storeText: + if (!eating) { + out.append(buf+i, 1); + } + continue; + + case T_META: + if (inBSQuot>=0) { + if (buf[i]=='-') { + if (++inBSQuot >= 2) { + inTag = T_COMM; + inBSQuot = 0; + } + } else { + inBSQuot = -1; + } + } + if (buf[i]=='>') + inTag = T_TEXT; + goto storeText; + + case T_COMM: + if (buf[i]=='>' && inBSQuot >= 2) + inTag = T_TEXT; + if (buf[i]=='-') + ++inBSQuot; + else + inBSQuot = 0; + goto storeText; + + case T_STAG: + if (buf[i]=='!') { + /* Push back SGML meta-tags including legal HTML coments */ + if (!eating) { + out.append(" syntax */ + if (!isspace(buf[i])) + inTag = T_TAG; + /* FALL THRU */ + case T_EQU: + if ((buf[i]=='"' || buf[i]=='\'') && !inBSQuot) { + tag.append(buf+i, 1); + inTag = (tpos) buf[i]; + continue; + } + inTag = T_TAG; + /* FALL THRU */ + case T_TAG: + switch (buf[i]) { + case '>': + tag.append(">", 1); + if (!processTag0() && !eating) { + out.append(tag.buf, this->tag.size); + } + tag.reset(); + inTag = T_TEXT; + continue; + case '<': + /* syntax error, but best effort to recover - open new tag */ + tag.append(">", 1); + if (!processTag0() && !eating) { + out.append(tag.buf, tag.size); + } + tag.reset(); + inTag = T_STAG; + goto storeTag; + case '=': + inTag = T_EQU; + /* FALL THRU */ + } + /* FALL THRU */ + case T_DQUOT: + case T_SQUOT: + if (!inBSQuot) { + if (buf[i]=='\\') { + inBSQuot = 1; + } else if (buf[i]==inTag) { + inTag = T_TAG; + } + } + /* FALL THRU */ + storeTag: + tag.append(buf+i, 1); + inBSQuot = 0; + } + } + return 0; +} --- squid-3.0_STABLE9/src/filters/ContentFilterHtml.h 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/ContentFilterHtml.h 2004-12-05 00:56:37.000000000 +0100 @@ -0,0 +1,88 @@ +/* + * DEBUG: section 94 Filter libraries + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#ifndef CONTENTFILTERHTML_H +#define CONTENTFILTERHTML_H + +#include "MemBuf.h" +#include "FilterModule.h" +#include "BufferedContentFilter.h" + +typedef enum { /* parsing state */ + T_TEXT=0, /* in ordinary text */ + T_STAG, /* at start of tag */ + T_TAG, /* inside tag */ + T_EQU, /* inside tag after an equals sign */ + T_META, /* inside a meta-tag */ + T_COMM, /* inside a comment */ + T_DQUOT='"', /* in double-quoted string */ + T_SQUOT='\'' /* in single-quoted string */ +} tpos; + +class FilterModuleHtml: public FilterModule { +public: + virtual ~FilterModuleHtml(); + +protected: + FilterModuleHtml(const wordlist *args, acl_access *access); +}; + +class ContentFilterHtml: public BufferedContentFilter { +public: + typedef RefCount Pointer; + void * operator new(size_t byteCount); + void operator delete(void *address); + + virtual ~ContentFilterHtml(); + +private: + friend class FilterModuleHtml; + CBDATA_CLASS(ContentFilterHtml); + + int processTag0(); + int cfFilter(const char *buf, int len); + + MemBuf tag; + tpos inTag; + signed inBSQuot: 4; + +protected: + ContentFilterHtml(FilterModule::Pointer o, clientHttpRequest *req); + + void insertTag(int nattribs, + const char * const attribs[], const char * const values[]); + virtual int processTag(int nattribs, char *attribs[], char *values[]) = 0; + + unsigned eating: 4; +}; + +#endif --- squid-3.0_STABLE9/src/filters/ContentFilterSkeleton.h 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/ContentFilterSkeleton.h 2004-12-02 22:00:54.000000000 +0100 @@ -0,0 +1,116 @@ +/* + * DEBUG: section 93 Filter modules + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +/* + This provides the common definitions for all content filter modules. + Takes the following definitions: + MODNAME - user visible module name + FMCLASS - class name for filter module + FMCLASS_BASE - class name for filter module superclass + FMCLASS_PRIVATE - private declarations for FMCLASS + CFCLASS - class name for content filter + CFCLASS_BASE - class name for content filter superclass + CFCLASS_PRIVATE - private declarations for CFCLASS (incl. ctor) + CFHTML - define if this is an HTML filter +*/ + +#include "ContentFilterHtml.h" + +class FMCLASS: public FMCLASS_BASE +{ +public: + typedef RefCount Pointer; + virtual ~FMCLASS(); + + virtual ContentFilter::Pointer createFilter(clientHttpRequest *req); + + class Factory: public FilterModule::Factory { + public: + Factory(const char *name) : FilterModule::Factory(name) {} + FilterModule::Pointer create(const wordlist *args, + acl_access *access) { + return FilterModule::Pointer(new FMCLASS(args, access)); + } + }; + +private: + friend class Factory; + friend class CFCLASS; + FMCLASS(const wordlist *args, acl_access *access); + FMCLASS_PRIVATE; +}; + + +class CFCLASS: public CFCLASS_BASE +{ +public: + typedef RefCount Pointer; + void * operator new(size_t byteCount); + void operator delete(void *address); + + ~CFCLASS(); + +protected: +#ifdef CFHTML + virtual int processTag(int nattribs, char *attribs[], char *values[]); +#endif +#ifdef CFPRIM + virtual CSR readfunc; + virtual CSCB callback; + virtual CSD detach; + virtual CSS status; +#endif + +private: + friend class FMCLASS; +#define cbdata_class_(C_) CBDATA_CLASS(C_) + cbdata_class_(CFCLASS); + CFCLASS_PRIVATE; +}; + +#define operator_new_(C_) \ +void *C_::operator new(size_t byteCount) { \ + assert(byteCount == sizeof(C_)); \ + CBDATA_INIT_TYPE(C_); \ + C_ *res = cbdataAlloc(C_); \ + return res; \ +} +operator_new_(CFCLASS) + +void CFCLASS::operator delete(void *address) { + ContentFilter *t = static_cast(address); + cbdataFree(t); + cbdataReferenceDone(t); +} + +#define cbdata_class_init_(C_) CBDATA_CLASS_INIT(C_) +cbdata_class_init_(CFCLASS); --- squid-3.0_STABLE9/src/filters/FM_activex.cc 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/FM_activex.cc 2008-09-07 15:51:37.000000000 +0200 @@ -0,0 +1,120 @@ +/* + * DEBUG: section 93 Filter modules + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ +#include "squid.h" +#include "wordlist.h" +#include "FilterModule.h" +#include "ContentFilterHtml.h" +#include "PatFile.h" + +#define fake_nattribs 2 + +#define MODNAME "activex" +#define FMCLASS FilterModuleActivex +#define FMCLASS_BASE FilterModuleHtml +#define FMCLASS_PRIVATE \ + PatFile::Pointer patfile; +#define CFCLASS ContentFilterActivex +#define CFCLASS_BASE ContentFilterHtml +#define CFCLASS_PRIVATE \ + ContentFilterActivex(FilterModule::Pointer o, clientHttpRequest *req); \ + bool checkpf(const char *c); +#define CFHTML + +#include "ContentFilterSkeleton.h" +FILTER_REGISTRY(FilterModuleActivex, "activex"); + +static const char *fake_attrs[fake_nattribs] = { NULL }; +static char *fake_valus[fake_nattribs] = { NULL }; + +FilterModuleActivex::FilterModuleActivex(const wordlist *args, + acl_access *access) + : FilterModuleHtml(args, access) { + description = new String("ActiveX filter"); + if (!fake_attrs[0]) { + fake_attrs[0] = "OBJECT"; + fake_valus[0] = NULL; + fake_attrs[1] = "ACTIVEX_REMOVED_BY_PROXY"; + fake_valus[1] = new char[128]; + snprintf(fake_valus[1], 128, "%s (%s)", getMyHostname(), full_appname_string); + } + if (args && args->key) { + patfile = PatFile::Pointer(new PatFile(args->key, false)); + } +} + +FilterModuleActivex::~FilterModuleActivex() { +} + +ContentFilter::Pointer FilterModuleActivex::createFilter(clientHttpRequest *req) { + ContentFilter::Pointer res = + ContentFilter::Pointer(new ContentFilterActivex(this, req)); + return res; +} + +ContentFilterActivex::ContentFilterActivex(FilterModule::Pointer o, + clientHttpRequest *req) + : ContentFilterHtml(o, req) { +} + +ContentFilterActivex::~ContentFilterActivex() { +} + +int ContentFilterActivex::processTag(int nattribs, + char *attribs[], char *values[]) { + int i; + + /* Opening OBJECT: defang ActiveX classid */ + if (!strcasecmp(attribs[0], "OBJECT")) + for (i=1; ilog_uri); + insertTag(fake_nattribs, fake_attrs, fake_valus); + return 1; + } + } + + return 0; +} + +bool ContentFilterActivex::checkpf(const char *c) { + FilterModuleActivex *o = dynamic_cast(owner.getRaw()); + if (o->patfile == NULL) + return false; + o->patfile->checkReload(); + return o->patfile->match(c) != NULL; +} --- squid-3.0_STABLE9/src/filters/FM_bugfinder.cc 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/FM_bugfinder.cc 2008-09-07 15:51:37.000000000 +0200 @@ -0,0 +1,144 @@ +/* + * DEBUG: section 93 Filter modules + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ +#include "squid.h" +#include "wordlist.h" +#include "FilterModule.h" + +#define HDRBUFSIZ 24 + +#define MODNAME "bugfinder" +#define FMCLASS FilterModuleBugfinder +#define FMCLASS_BASE FilterModule +#define FMCLASS_PRIVATE \ + int thresh; +#define CFCLASS ContentFilterBugfinder +#define CFCLASS_BASE ContentFilter +#define CFCLASS_PRIVATE \ + ContentFilterBugfinder(FilterModule::Pointer o, clientHttpRequest *req); \ + unsigned char bufr[HDRBUFSIZ]; \ + int posin; +#define CFPRIM +#include "ContentFilterSkeleton.h" +FILTER_REGISTRY(FilterModuleBugfinder, "bugfinder"); + +FilterModuleBugfinder::FilterModuleBugfinder(const wordlist *args, + acl_access *access) + : FilterModule(access) { + if (args->key) { + if ((thresh = atoi(args->key)) < 1) { + debug(93, 1) (MODNAME ": threshold size needs to be >= 1\n"); + return; + } + } else { + thresh = 2; + } + typ = FIL_CONTFILTER; + trigger = new String("image/gif image/png"); + description = new String("Web-Bug logger"); +} + +FilterModuleBugfinder::~FilterModuleBugfinder() { +} + +ContentFilter::Pointer FilterModuleBugfinder::createFilter(clientHttpRequest *req) { + ContentFilter::Pointer res = + ContentFilter::Pointer(new ContentFilterBugfinder(this, req)); + return res; +} + +ContentFilterBugfinder::ContentFilterBugfinder(FilterModule::Pointer o, + clientHttpRequest *req) + : ContentFilter(o, req), posin(0) { +} + +ContentFilterBugfinder::~ContentFilterBugfinder() { +} + +static void processImageHeader(unsigned char *b, int thresh, const char *log_uri) { + int x = 10000, y = 10000; + const char *t = ""; + if (!memcmp(b, "GIF8", 4)) { + t = "GIF"; + x = b[6] + (b[7]<<8); + y = b[8] + (b[9]<<8); + } else if (!memcmp(b, "\x89PNG", 4)) { + t = "PNG"; + x = ntohl(*(unsigned long *)(b+16)); + y = ntohl(*(unsigned long *)(b+20)); + } + if (x <= thresh && y <= thresh) { + debug(93, 0) (MODNAME ": is a %dx%d %s image: %s\n", + x, y, t, log_uri); + } +} + +void ContentFilterBugfinder::callback(clientStreamNode *thisNode, + ClientHttpRequest *http, HttpReply *rep, + StoreIOBuffer receivedData) { + debug(93, 9) ("bugfinder:callback %lld %d\n", receivedData.offset, receivedData.length); + + if (receivedData.offset < HDRBUFSIZ) { + // collect first HDRBUFSIZ bytes in bufr + int n = XMIN(receivedData.length, + (size_t)(HDRBUFSIZ - receivedData.offset)); + xmemcpy(bufr + posin, receivedData.data, n); + posin += n; + } + if (posin == HDRBUFSIZ) { + debug(93, 8) ("bugfinder: got %d bytes, analyzing...\n", posin); + FilterModuleBugfinder *o = dynamic_cast(owner.getRaw()); + processImageHeader(bufr, o->thresh, request->log_uri); + ++posin; + } + + // pass through + clientStreamCallback(thisNode, http, rep, receivedData); +} + +void ContentFilterBugfinder::readfunc(clientStreamNode *thisNode, + ClientHttpRequest *http) { + // pass through + StoreIOBuffer writeBuffer = thisNode->next()->readBuffer; + clientStreamRead(thisNode, http, writeBuffer); +} + +void ContentFilterBugfinder::detach(clientStreamNode *thisNode, + ClientHttpRequest *http) { + // detach + clientStreamDetach(thisNode, http); +} + +clientStream_status_t ContentFilterBugfinder::status(clientStreamNode *thisNode, + ClientHttpRequest *http) { + // pass through + return clientStreamStatus(thisNode, http); +} --- squid-3.0_STABLE9/src/filters/FM_gifanim.cc 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/FM_gifanim.cc 2008-09-07 15:51:37.000000000 +0200 @@ -0,0 +1,123 @@ +/* + * DEBUG: section 93 Filter modules + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ +#include "squid.h" +#include "wordlist.h" +#include "FilterModule.h" + +#define MODNAME "gifanim" +#define FMCLASS FilterModuleGifanim +#define FMCLASS_BASE FilterModule +#define FMCLASS_PRIVATE \ + int ncycles; +#define CFCLASS ContentFilterGifanim +#define CFCLASS_BASE BufferedContentFilter +#define CFCLASS_PRIVATE \ + ContentFilterGifanim(FilterModule::Pointer o, clientHttpRequest *req); \ + int cfFilter(const char *buf, int len); \ + int inHold; +#include "ContentFilterSkeleton.h" +FILTER_REGISTRY(FilterModuleGifanim, "gifanim"); + +FilterModuleGifanim::FilterModuleGifanim(const wordlist *args, + acl_access *access) + : FilterModule(access) { + if (args && args->key) { + ncycles = atoi(args->key); + } else { + ncycles = 0; + } + typ = FIL_CONTFILTER; + trigger = new String("image/gif"); + description = new String("GIF animation breaker"); +} + +FilterModuleGifanim::~FilterModuleGifanim() { +} + +ContentFilter::Pointer FilterModuleGifanim::createFilter(clientHttpRequest *req) { + ContentFilter::Pointer res = + ContentFilter::Pointer(new ContentFilterGifanim(this, req)); + return res; +} + +ContentFilterGifanim::ContentFilterGifanim(FilterModule::Pointer o, + clientHttpRequest *req) + : BufferedContentFilter(o, req), inHold(0) { +} + +ContentFilterGifanim::~ContentFilterGifanim() { +} + + +static const char magic[] = "\x21\xff\x0bNETSCAPE2.0\x03\x01"; +static const char nomagic[] = "\x21\xfe\x0bXXXXXXXX1.0\x03\x01"; +#define TRIGGERLEN 16 +#define KILLLEN 3 + + +int ContentFilterGifanim::cfFilter(const char *buf, int len) +{ + int i; + for (i=0; i= TRIGGERLEN+KILLLEN) { + char x[3] = { 0 }; + FilterModuleGifanim *o = dynamic_cast(owner.getRaw()); + int ncycles = o->ncycles; + debug(93,3) (MODNAME ": %s: breaking animation to %d\n", + request->log_uri, ncycles); + if (ncycles < 0) + return -1; + x[0] = ncycles & 255; + x[1] = ncycles >> 8; + out.append(ncycles ? magic : nomagic, TRIGGERLEN); + out.append(x, 3); + inHold = 0; + out.append(buf+i, 1); + } else if (inHold >= TRIGGERLEN) { + ++inHold; + } else if (inHold > 0) { + if (buf[i] == magic[inHold]) { + ++inHold; + } else { + out.append(magic, inHold); + inHold = 0; + out.append(buf+i, 1); + } + } else { + if (buf[i] == magic[0]) + inHold = 1; + else + out.append(buf+i, 1); + } + } + return len; +} --- squid-3.0_STABLE9/src/filters/FM_redirect.cc 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/FM_redirect.cc 2008-09-07 15:51:37.000000000 +0200 @@ -0,0 +1,76 @@ +/* + * DEBUG: section 93 Filter modules + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ +#include "squid.h" +#include "wordlist.h" +#include "FilterModule.h" +#include "PatFile.h" + +class FilterModuleRedir: public FilterModule { +public: + virtual char *filter(clientHttpRequest *req, const void *arg); + + class Factory: public FilterModule::Factory { + public: + Factory(const char *name) : FilterModule::Factory(name) {} + FilterModule::Pointer create(const wordlist *args, + acl_access *access) { + return FilterModule::Pointer(new FilterModuleRedir(args, access)); + } + }; + +private: + friend class Factory; + FilterModuleRedir(const wordlist *args, acl_access *access); + + PatFile::Pointer patfile; +}; + +FILTER_REGISTRY(FilterModuleRedir, "redirect"); + +FilterModuleRedir::FilterModuleRedir(const wordlist *args, + acl_access *access) + : FilterModule(access) { + if (args->key == NULL) { + debug(93, 1) ("FilterModuleRedir: no pattern file!\n"); + return; + } + typ = FIL_REDIRECT; + description = new String("URI Redirector "); + description->append(args->key); + patfile = PatFile::Pointer(new PatFile(args->key, true)); + access_list = access; +} + +char *FilterModuleRedir::filter(clientHttpRequest *req, const void *arg) { + patfile->checkReload(); + return patfile->match((const char *)arg); +} --- squid-3.0_STABLE9/src/filters/FM_script.cc 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/FM_script.cc 2004-12-01 23:23:15.000000000 +0100 @@ -0,0 +1,174 @@ +/* + * DEBUG: section 93 Filter modules + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ +#include "squid.h" +#include "FilterModule.h" +#include "ContentFilterHtml.h" + +#define MODNAME "script" +#define FMCLASS FilterModuleScript +#define FMCLASS_BASE FilterModuleHtml +#define FMCLASS_PRIVATE +#define CFCLASS ContentFilterScript +#define CFCLASS_BASE ContentFilterHtml +#define CFCLASS_PRIVATE \ + ContentFilterScript(clientHttpRequest *req); +#define CFHTML + +#include "ContentFilterSkeleton.h" +FILTER_REGISTRY(FilterModuleScript, "script"); + +static bool inited = false; + +/* the isJSAttrib function */ +#include "jsattrib.h" +static regex_t jsStyleRE; + +/* the HTML_* REs */ +#include "jsre.h" +static regex_t jsEntityRE; + +/* SUBMIT tag */ +static const char *submit_attrs[] = { "INPUT", "TYPE" }; +static const char *submit_valus[] = { NULL, "SUBMIT" }; +#define submit_nattribs 2 + +FilterModuleScript::FilterModuleScript(const wordlist *args, + acl_access *access) + : FilterModuleHtml(args, access) { + description = new String("Javascript filter"); + if (!inited) { + int e; + e = regcomp(&jsStyleRE, "text/javascript", + REG_EXTENDED|REG_ICASE|REG_NOSUB); + if (e) + debug(93, 1) (MODNAME ": regcomp 1: %d\n", e); + e = regcomp(&jsEntityRE, + "&\\{.*\\}|" HTML_mocha ":|" HTML_javascript ":|" + HTML_livescript ":|" HTML_eval "\\(.*\\)", + REG_EXTENDED|REG_ICASE|REG_NOSUB); + if (e) + debug(93, 1) (MODNAME ": regcomp 2: %d\n", e); + inited = true; + } +} + +FilterModuleScript::~FilterModuleScript() { +} + +ContentFilter::Pointer FilterModuleScript::createFilter(clientHttpRequest *req) { + ContentFilter::Pointer res = + ContentFilter::Pointer(new ContentFilterScript(req)); + return res; +} + +ContentFilterScript::ContentFilterScript(clientHttpRequest *req) + : ContentFilterHtml(NULL, req) { +} + +ContentFilterScript::~ContentFilterScript() { +} + +/* Currently known ways to specify Javascript scripts: + + (Netscape) + (Netscape) + (Netscape) + (MSIE) +*/ + +int ContentFilterScript::processTag(int nattribs, char *attrs[], char *valus[]) +{ + int i, mod = 0, sub = 0; + + /* Opening SCRIPT: eat this tag and everything that follows */ + if (!strcasecmp(attrs[0], "SCRIPT")) { + debug(93, 3) (MODNAME ": %s: removing SCRIPT\n", request->log_uri); + eating = 1; + return 1; + } + + /* Closing SCRIPT: eat this tag, resume normal operation */ + if (eating==1 && !strcasecmp(attrs[0], "/SCRIPT")) { + eating = 0; + return 1; + } + + /* Opening STYLE: start eat if javascript style */ + if (!strcasecmp(attrs[0], "STYLE")) + for (i=1; ilog_uri); + eating = 2; + return 1; + } + + /* Closing STYLE: eat this tag, resume normal operation */ + if (eating==2 && !strcasecmp(attrs[0], "/STYLE")) { + eating = 0; + return 1; + } + + /* Remove Javascript attributes from tags */ + for (i=1; ilog_uri, attrs[i], attrs[0]); + mod = 1; + /* If the Javascript attribute contains a submit function, + fake a submit button. Otherwise we may end up with + a non-submittable form */ + if (valus[i] && strstr(valus[i], "submit()")) + sub = 1; + attrs[i] = NULL; + } else if (attrs[i] && valus[i] && + !regexec(&jsEntityRE, valus[i], 0, NULL, 0)) { + debug(93, 3) (MODNAME ": %s: removing %s from %s\n", + request->log_uri, valus[i], attrs[0]); + if (valus[i] && strstr(valus[i], "submit()")) + sub = 1; + mod = 1; + attrs[i] = NULL; + } + } + if (!mod) + return 0; + + /* Insert the cleaned tag */ + insertTag(nattribs, attrs, valus); + if (sub) { + debug(93, 3) (MODNAME ": %s: faking SUBMIT\n", request->log_uri); + insertTag(submit_nattribs, submit_attrs, submit_valus); + } + return 1; +} --- squid-3.0_STABLE9/src/filters/FilterModule.cc 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/FilterModule.cc 2008-10-08 13:27:32.000000000 +0200 @@ -0,0 +1,162 @@ +/* + * DEBUG: section 92 Filtering module infrastructure + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#include "squid.h" +#include "ACLChecklist.h" +#include "HttpRequest.h" +#include "FilterModule.h" + +FilterModule::Factory *FilterModule::Factory::registry = NULL; + +FilterModule::Factory::Factory(const char *n) : name(n) { + next = registry; + registry = this; +} + +void FilterModule::Factory::filterInit(const char *name, + const wordlist *args, + acl_access *access, + const char *config_info) { + debug(92, 4) ("creating filter: %s\n", name); + FilterModule::Factory *f = registry; + while (f) { + if (!strcmp(f->name, name)) { + FilterModule::Pointer p = f->create(args, access); + FilterModule::registerModule(p, config_info); + return; + } + f = f->next; + } + debug(92, 0) ("%s: unknown filter type, ignoring: '%s'\n", + config_info, name); +} + +FilterModule::FilterModule(acl_access *access) + : typ(FIL_DUMMY), description(NULL), trigger(NULL), access_list(access) { +} + +FilterModule::~FilterModule() { + debug(92, 5) ("~FilterModule %p %s\n", this, description->buf()); + delete description; + delete trigger; + // do not delete access_list here, it will be freed by free_module() +} + +char *FilterModule::filter(clientHttpRequest *req, const void *arg) { + fatalf("This module does not correctly implement FilterModule::filter: %p %s", this, description->buf()); + return NULL; +} + +ContentFilter::Pointer FilterModule::createFilter(clientHttpRequest *http) { + fatalf("This module does not correctly implement FilterModule::createFilter: %p %s", this, description->buf()); + return ContentFilter::Pointer(NULL); +} + +bool FilterModule::matchACL(clientHttpRequest *http) { + if (access_list == NULL) + return true; + debug(92, 8) ("matchACL: checking ACL for %s\n", description->buf()); + ACLChecklist ch; + ch.src_addr = http->request->client_addr; + ch.my_addr = http->request->my_addr; + ch.my_port = http->request->my_port; + ch.request = HTTPMSGLOCK(http->request); + ch.accessList = cbdataReference(access_list); + int r = ch.fastCheck(); + debug(92, 8) ("matchACL: %s filter\n", (r == 0) ? "WILL" : "will NOT"); + return (r == 0); +} + +const char *FilterModule::getDescription() const { + return description == NULL ? "" : description->buf(); +} + +const char *FilterModule::getTrigger() const { + return trigger == NULL ? "" : trigger->buf(); +} + + +FilterModule::Pointer FilterModule::chain[FIL_END]; + +void FilterModule::registerModule(FilterModule::Pointer m, + const char *config_info) { + if (m->typ <= FIL_DUMMY || m->typ >= FIL_END) { + debug(92, 1) ("%s: registerModule: invalid type or initialization failed, skipping\n", config_info); + return; + } + debug(92, 5) ("registerModule %p %s\n", m.getRaw(), m->description->buf()); + Pointer p = chain[m->typ]; + if (p == NULL) { + chain[m->typ] = m; + } else { + while (p->next != NULL) p = p->next; + p->next = m; + } + m->next = NULL; +} + +bool FilterModule::Iterator::hasNext() { + return p != NULL; +} + +FilterModule::Pointer FilterModule::Iterator::next() { + Pointer x = p; + p = p->next; + return x; +} + +FilterModule::Iterator FilterModule::getChain(squidFilterType typ) { + return Iterator(chain[typ]); +} + +bool FilterModule::hasChain(squidFilterType typ) { + return chain[typ] != NULL; +} + +void FilterModule::initAll() { + filter_module *list; + for (list = Config.modules; list; list = list->next) { + char buf[1024]; + snprintf(buf, sizeof(buf), "%s:%d", + list->cfg_filename, list->config_lineno); + Factory::filterInit(list->module, list->params, list->access_list, buf); + } +} + +void FilterModule::destroyAll() { + for (int i = FIL_DUMMY; i < FIL_END; ++i) { + while (chain[i] != NULL) { + chain[i] = chain[i]->next; + } + } +} + --- squid-3.0_STABLE9/src/filters/Makefile.am 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/Makefile.am 2004-11-29 22:31:02.000000000 +0100 @@ -0,0 +1,38 @@ +# Makefile for filter modules in the Squid Object Cache server +# +# $Id: Makefile.am,v 1.9 2004-11-29 21:31:02 olaf Exp $ +# +AUTOMAKE_OPTIONS = subdir-objects +AM_CFLAGS = @SQUID_CFLAGS@ +AM_CXXFLAGS = @SQUID_CXXFLAGS@ + +noinst_LIBRARIES = libfilters.a + +FILTER_ALL_SOURCE = \ + filter.cc \ + FilterModule.cc \ + ContentFilter.cc \ + BufferedContentFilter.cc \ + ContentFilterHtml.cc + +if ENABLE_FILTERS +libfilters_a_SOURCES = $(FILTER_ALL_SOURCE) +else +libfilters_a_SOURCES = +endif + +INCLUDES = -I. -I$(top_builddir)/include -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/ + +# This is known to work only with gperf version 2.7. +# We make an extra rule here instead of a direct dependency +# in order to avoid unnecessary/accidental remake. +jsattrib.h.new: jsattrib.gperf + gperf -k'1,5,8' -LC++ -ZJSHash -o -l -C -NisJSAttrib jsattrib.h.new + +jsre.h: makejsre + $(PERL) makejsre mocha javascript livescript eval >jsre.h + --- squid-3.0_STABLE9/src/filters/filter.cc 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/filter.cc 2008-09-11 21:50:32.000000000 +0200 @@ -0,0 +1,99 @@ +/* + * DEBUG: section 92 Filter module infrastructure + * AUTHOR: Olaf Titz + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#include "squid.h" +#include "FilterModule.h" + +char *canonType(const char *t) +{ + char *x, *p; + if (!t) + return NULL; + p = x = xstrdup(t); + while (*p) { + if (*p==';') { + *p='\0'; + return x; + } + *p = tolower(*p); + ++p; + } + return x; +} + +int mtmatch(const char *ct, const char *tr) +{ + while (*tr) { + const char *c = ct; + const char *t = tr; + if (t[0] == '*' && t[1] == '/') { + for (; *c && *c != '/'; ++c); + if (*c) + ++c; + t += 2; + } + while (*c) { + if (*t == '*') + return 1; + if (*c != *t) { + if (!*t) + return 0; + break; + } + ++c; + ++t; + } + if (!*t || *t == ' ') + return 1; + for (tr = t; *tr && *tr != ' '; ++tr); + if (*tr) + ++tr; + } + return 0; +} + +char *moduleRedirect(const char *uri, clientHttpRequest *http) +{ + const char *res = uri, *tmp; + for (FilterModule::Iterator i = FilterModule::getChain(FIL_REDIRECT); + i.hasNext();) { + FilterModule::Pointer f = i.next(); + debug(92, 9) ("moduleRedirect: checking %p\n", f.getRaw()); + if (f->matchACL(http)) { + tmp = f->filter(http, res); + if (tmp) + res = tmp; + } + } + return (char *)res; /* XX */ +} + --- squid-3.0_STABLE9/src/filters/jsattrib.gperf 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/jsattrib.gperf 2004-11-28 19:39:27.000000000 +0100 @@ -0,0 +1,24 @@ +%{ +/* gperf output needs postprocessing for case-insensitivity! */ +%} +# gperf defines const char *isJSAttrib(const char *str, int len); +# to return NULL iff str is not one of the following words. +# Words must be in lower case. +onload +onunload +onclick +ondblclick +onmousedown +onmouseup +onmouseover +onmousemove +onmouseout +onfocus +onblur +onkeypress +onkeydown +onkeyup +onsubmit +onreset +onselect +onchange --- squid-3.0_STABLE9/src/filters/jsattrib.h 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/jsattrib.h 2004-11-28 19:39:27.000000000 +0100 @@ -0,0 +1,122 @@ +/* C++ code produced by gperf version 2.7.2 */ +/* Command-line: gperf -k1,5,8 -LC++ -ZJSHash -o -l -C -NisJSAttrib */ +/* gperf output needs postprocessing for case-insensitivity! */ + +#define TOTAL_KEYWORDS 18 +#define MIN_WORD_LENGTH 6 +#define MAX_WORD_LENGTH 11 +#define MIN_HASH_VALUE 6 +#define MAX_HASH_VALUE 33 +/* maximum key range = 28, duplicates = 0 */ + +class JSHash +{ +private: + static inline unsigned int hash (const char *str, unsigned int len); +public: + static const char *isJSAttrib (const char *str, unsigned int len); +}; + +inline unsigned int +JSHash::hash (register const char *str, register unsigned int len) +{ + static const unsigned char asso_values[] = + { + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 20, 10, 15, + 5, 5, 34, 34, 34, 10, 34, 34, 0, 10, + 34, 0, 34, 34, 34, 5, 0, 0, 34, 5, + 34, 0, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34 + }; + register int hval = len; + + switch (hval) + { + default: + case 8: + hval += asso_values[tolower((unsigned char)str[7])]; + case 7: + case 6: + case 5: + hval += asso_values[tolower((unsigned char)str[4])]; + case 4: + case 3: + case 2: + case 1: + hval += asso_values[tolower((unsigned char)str[0])]; + break; + } + return hval; +} + +const char * +JSHash::isJSAttrib (register const char *str, register unsigned int len) +{ + static const unsigned char lengthtable[] = + { + 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11, 7, 8, + 9, 10, 11, 7, 8, 0, 10, 11, 7, 0, 0, 0, 6, 0, + 0, 0, 0, 0, 0, 8 + }; + static const char * const wordlist[] = + { + "", "", "", "", "", "", + "onblur", + "onkeyup", + "onselect", + "onmouseup", + "onmouseout", + "onmouseover", + "onreset", + "onunload", + "onkeydown", + "onkeypress", + "onmousedown", + "onclick", + "onsubmit", + "", + "ondblclick", + "onmousemove", + "onfocus", + "", "", "", + "onload", + "", "", "", "", "", "", + "onchange" + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + if (len == lengthtable[key]) + { + register const char *s = wordlist[key]; + + if (!strcasecmp(str, s)) + return s; + } + } + return 0; +} --- squid-3.0_STABLE9/src/filters/jsre.h 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/jsre.h 2004-11-28 19:39:27.000000000 +0100 @@ -0,0 +1,5 @@ +/* This file was generated by makejsre - do not edit */ +#define HTML_mocha "(m|&#(77|109|x[46]d);?|%[46]d)(o|&#(79|111|x[46]f);?|%[46]f)(c|&#(67|99|x[46]3);?|%[46]3)(h|&#(72|104|x[46]8);?|%[46]8)(a|&#(65|97|x[46]1);?|%[46]1)" +#define HTML_javascript "(j|&#(74|106|x[46]a);?|%[46]a)(a|&#(65|97|x[46]1);?|%[46]1)(v|&#(86|118|x[57]6);?|%[57]6)(a|&#(65|97|x[46]1);?|%[46]1)(s|&#(83|115|x[57]3);?|%[57]3)(c|&#(67|99|x[46]3);?|%[46]3)(r|&#(82|114|x[57]2);?|%[57]2)(i|&#(73|105|x[46]9);?|%[46]9)(p|&#(80|112|x[57]0);?|%[57]0)(t|&#(84|116|x[57]4);?|%[57]4)" +#define HTML_livescript "(l|&#(76|108|x[46]c);?|%[46]c)(i|&#(73|105|x[46]9);?|%[46]9)(v|&#(86|118|x[57]6);?|%[57]6)(e|&#(69|101|x[46]5);?|%[46]5)(s|&#(83|115|x[57]3);?|%[57]3)(c|&#(67|99|x[46]3);?|%[46]3)(r|&#(82|114|x[57]2);?|%[57]2)(i|&#(73|105|x[46]9);?|%[46]9)(p|&#(80|112|x[57]0);?|%[57]0)(t|&#(84|116|x[57]4);?|%[57]4)" +#define HTML_eval "(e|&#(69|101|x[46]5);?|%[46]5)(v|&#(86|118|x[57]6);?|%[57]6)(a|&#(65|97|x[46]1);?|%[46]1)(l|&#(76|108|x[46]c);?|%[46]c)" --- squid-3.0_STABLE9/src/filters/makejsre 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/makejsre 2004-11-28 19:39:27.000000000 +0100 @@ -0,0 +1,25 @@ +#!/usr/bin/perl + +# For all command line arguments, make REs (POSIX extended) out of them +# where every character is matched by +# - itself; +# - the &#DEC; form, +# - the &#xHEX; form, +# - each with the semicola optional; +# - the %HEX form. +# - DEC and HEX forms match the character and its upcase equivalent. +# Hopefully this catches all forms in which those keywords can be +# expressed in HTML. (The whole RE is treated as case-insensitive.) + +print "/* This file was generated by makejsre - do not edit */\n"; +foreach (@ARGV) { + tr/A-Z/a-z/; # we need lowercase (and ASCII charset) below + $x=$_; + s~.~sprintf( + "(%s|&#(%d|%d|x[%x%x]%x);?|%%[%x%x]%x)", + $&, ord($&)-32, ord($&), + (ord($&)/16)-2, ord($&)/16, ord($&)&15, + (ord($&)/16)-2, ord($&)/16, ord($&)&15 + )~ge; + printf "#define HTML_%s \"%s\"\n", $x, $_; +} --- squid-3.0_STABLE9/src/filters/redirect.sample 1970-01-01 01:00:00.000000000 +0100 +++ squid-3.0_STABLE9+filter0.2/src/filters/redirect.sample 2004-11-04 12:17:49.000000000 +0100 @@ -0,0 +1,35 @@ +# This is an example on how to configure the redirect.so module. +# Each line consists of either one field, or two fields separated by TAB. +# One field means: redirect to the default replacement pattern. +# First field being a single "!" means: define the default replacement +# pattern. Two fields means: replace with second field. +# Patterns with a - in front are case-insensitive. + +### This shows how to reject ad banners: +# Define the replacement. In my case this is a 1x1 GIF. +# Put this line first. +! http://127.0.0.1/dummy.gif + +# Ad banner patterns we don't want to see. +-http://ad\.doubleclick\.net(:80)?(/.*)?$ +-http://adforce\.imgis\.com(:80)?(/.*)?$ +-/advert(i[sz](e(ments?)?|ing))?/ +-/banners?/ + +### The replacement URL below returns error 404 on my server. +# This is to guard against special Netscape stupidities. +/netscape-related.html$ http://127.0.0.1/404 +http://javascript-of-unknown-origin\.netscape\.com http://127.0.0.1/404 +# ...and Microsoft stupidities. +/favicon.ico$ http://127.0.0.1/404 + +### This example shows how to redirect requests to a nearer mirror. +-http://www.xfree86.org(:80)?(/.*)?$ http://www.uni-paderborn.de/mirrors/xfree86\2 + +# Note the common pattern +# (:80)?(/.*)?$ +# at the end of the URLs. This should catch all combinations of URLs with +# or without default port number, and with or without the trailing slash +# if the path is null. \2 is the whole path component, including +# leading slash. + --- squid-3.0_STABLE9/src/http.cc 2008-09-07 10:58:23.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/src/http.cc 2008-09-07 15:51:37.000000000 +0200 @@ -1382,6 +1382,31 @@ HttpStateData::httpBuildRequestHeader(Ht } } +#if USE_FILTERS + /* Remove the NOFILTER hack from the Host header. + FIXME: are there other headers to take care of? + */ + { + String str = hdr_in->getStrOrList(HDR_HOST); + if (str.size() > 9) { + const char *x = str.buf() + str.size() - 9; + if (!strcasecmp(x, ".nofilter")) { + const char *h = getMyHostname(); + int hl = strlen(h); + x -= hl + 1; + if (str.size() > hl + 10 && + *x == '.' && + !strncasecmp(x + 1, h, hl)) { + str.cutPointer(x); + hdr_out->delById(HDR_HOST); + hdr_out->putStr(HDR_HOST, str.buf()); + } + } + } + str.clean(); + } +#endif + /* append Authorization if known in URL, not in header and going direct */ if (!hdr_out->has(HDR_AUTHORIZATION)) { if (!request->flags.proxying && *request->login) { --- squid-3.0_STABLE9/src/main.cc 2008-09-07 10:58:22.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/src/main.cc 2008-09-07 11:36:20.000000000 +0200 @@ -51,6 +51,9 @@ #include "ACL.h" #include "htcp.h" #include "StoreFileSystem.h" +#if USE_FILTERS +#include "FilterModule.h" +#endif #include "DiskIO/DiskIOModule.h" #include "comm.h" #if USE_EPOLL @@ -624,6 +627,9 @@ mainReconfigure(void) #endif redirectShutdown(); +#if USE_FILTERS + FilterModule::destroyAll(); +#endif authenticateShutdown(); externalAclShutdown(); storeDirCloseSwapLogs(); @@ -655,6 +661,9 @@ mainReconfigure(void) idnsInit(); #endif +#if USE_FILTERS + FilterModule::initAll(); +#endif redirectInit(); authenticateInit(&Config.authConfiguration); externalAclInit(); @@ -852,6 +861,10 @@ mainInitialize(void) #endif +#if USE_FILTERS + FilterModule::initAll(); +#endif + redirectInit(); authenticateInit(&Config.authConfiguration); @@ -1628,6 +1641,9 @@ SquidShutdown() #endif redirectShutdown(); +#if USE_FILTERS + FilterModule::destroyAll(); +#endif externalAclShutdown(); icpConnectionClose(); #if USE_HTCP --- squid-3.0_STABLE9/src/structs.h 2008-09-07 10:58:20.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/src/structs.h 2008-09-07 11:36:20.000000000 +0200 @@ -148,6 +148,11 @@ unsigned int accel: unsigned int vhost: 1; /* uses host header */ +#if USE_FILTERS +unsigned int nofilter: + 1; /* don't filter */ +#endif + int vport; /* virtual port support, -1 for dynamic, >0 static*/ int disable_pmtu_discovery; #if LINUX_TPROXY @@ -683,6 +688,10 @@ struct _SquidConfig header_mangler reply_header_access[HDR_ENUM_END]; char *coredump_dir; char *chroot_dir; +#ifdef USE_FILTERS + filter_module *modules; +#endif + #if USE_CACHE_DIGESTS struct @@ -1244,7 +1253,9 @@ struct request_flags #if LINUX_TPROXY tproxy = 0; #endif - +#if USE_FILTERS + filter = 1; +#endif } unsigned int range: @@ -1294,10 +1305,16 @@ unsigned int accelerated: unsigned int transparent: 1; +#if USE_FILTERS +unsigned int filter: + 1; +#endif + #if LINUX_TPROXY unsigned int tproxy: 1; /* spoof client ip using tproxy */ #endif + unsigned int internal: 1; @@ -1691,4 +1708,15 @@ struct _customlog customlog_type type; }; +#ifdef USE_FILTERS +struct _filter_module { + char *module; + acl_access *access_list; + wordlist *params; + const char *cfg_filename; + int config_lineno; + filter_module *next; +}; +#endif + #endif /* SQUID_STRUCTS_H */ --- squid-3.0_STABLE9/src/typedefs.h 2008-09-07 10:58:20.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/src/typedefs.h 2008-09-07 11:36:20.000000000 +0200 @@ -208,6 +208,10 @@ typedef variable_list *(oid_ParseFn) (va typedef struct _snmp_request_t snmp_request_t; #endif +#if USE_FILTERS +typedef struct _filter_module filter_module; +#endif + typedef void FREE(void *); typedef void CBDUNL(void *); typedef void FOCB(void *, int fd, int errcode); --- squid-3.0_STABLE9/src/url.cc 2008-09-07 10:58:20.000000000 +0200 +++ squid-3.0_STABLE9+filter0.2/src/url.cc 2008-09-07 11:36:20.000000000 +0200 @@ -193,6 +193,9 @@ urlParse(method_t method, char *url, Htt int port; protocol_t protocol = PROTO_NONE; int l; +#if USE_FILTERS + int f = 1; +#endif proto[0] = host[0] = urlpath[0] = login[0] = '\0'; if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) { @@ -270,6 +273,29 @@ urlParse(method_t method, char *url, Htt if (Config.appendDomain && !strchr(host, '.')) strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1); +#if USE_FILTERS + /* check if hostname ends in ("." myhost ".nofilter") */ + if ((l = strlen(host)) > 9) { + for (t = host + l - 9, l = 8; l > 0; l--) + if (t[l] != ".nofilter"[l]) + break; + if (!l) { + const char *h = getMyHostname(); + int k = strlen(host) - 10; + l = strlen(h) - 1; + if (k > l) { + for (; l >= 0; k--, l--) + if (host[k] != xtolower(h[l])) + break; + if (l == -1 && host[k] == '.') { + host[k] = '\0'; + f = 0; + } + } + } + } +#endif + if (port < 1 || port > 65535) { debugs(23, 3, "urlParse: Invalid port '" << port << "'"); return NULL; @@ -329,6 +355,9 @@ urlParse(method_t method, char *url, Htt xstrncpy(request->host, host, SQUIDHOSTNAMELEN); xstrncpy(request->login, login, MAX_LOGIN_SZ); request->port = (u_short) port; +#if USE_FILTERS + request->flags.filter = f; +#endif return request; }