#!/usr/bin/perl -wT
# -*- Mode: perl; tab-width: 4; indent-tabs-mode: nil; -*-
#
# data: The data: URI kitchen
#
# Copyright (c) 2002 by Ian Hickson
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

use strict;
use CGI;
use CGI::Carp;
use HTML::Entities;
use MIME::Base64;
use URI::Escape;
use LWP::UserAgent;

# collect data from user
my $query = CGI->new();
my $type = $query->param('type') || '';
my $base64 = $query->param('base64') || '0';
my $content = $query->param('content') || '';
my $typeFromFile = $query->param('typeFromFile') || '0';
my $file = $query->upload('file');
my $uri = $query->param('uri') || undef;
my $ignoreErrors = $query->param('ignoreErrors') || 0;

if ((defined $file ? -s $file : length $content) > 8*1024) {
    print <<end;
Status: 400 Bad User, Bad
Content-Type: text/plain

8KB upload limit exceeded
end
    exit;
}

if ($file and $typeFromFile) {
    my $filetype = $query->uploadInfo($query->param('file'))->{'Content-Type'};
    $type = $filetype if defined $filetype;
}

$type =~ m/^ *([-+_a-zA-Z0-9]+\/[-+_a-zA-Z0-9]+(?:;[-+_=:*; a-zA-Z0-9]*)?) *$/;
$type = $1;

if ($type) {
    if (defined($file)) {
        local $/ = undef;
        $content = <$file>;
    } elsif (defined($uri)) {
        my $ua = LWP::UserAgent->new();
        $ua->protocols_allowed(['http', 'https']);
        my $request = HTTP::Request->new(GET => $uri);
        my $response = $ua->request($request);
        if ($response->is_error && not $ignoreErrors) {
            my $message = $response->status_line;
            foreach ($uri, $type, $base64, $message) {
                $_ = encode_entities($_);
            }
            print <<end;
Status: 502 Bad Gateway
Content-Type: text/html;charset=utf-8

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN">
<html lang="en">
 <head>
  <title>The data: URI kitchen: Error</title>
 </head>
 <body>
  <h1>The data: URI kitchen: Error</h1>
  <p>The URI you provided:</p>
  <blockquote><pre><a href="$uri">$uri</a></pre></blockquote>
  <p>...is not working:</p>
  <blockquote><pre>$message</pre></blockquote>
  <form action="" method=post>
   <p>
    <input type="hidden" name="type" value="$type">
    <input type="hidden" name="base64" value="$base64">
    <input type="hidden" name="ignoreErrors" value="1">
    <input type="hidden" name="uri" value="$uri">
    <input type="submit" value="Try again and ignore errors">
   </p>
  </form>
 </body>
</html>
end
            exit;
        }
        $content = $response->content;
        $type = $response->content_type || $type;
    }

    #savelog($content, $type);

    my $result = "data:$type";
    if ($base64) {
        $result .= ';base64';
        $content = encode_base64($content, '');
    }
    $result .= ',' . uri_escape($content);
    # warn sprintf("Created a %d byte data: URI: %s\n", length($result), $result);
    my $resultEscaped = encode_entities($result);
    

    # Apache (at least the version this script was originally written
    # for) uses, it appears, an 8kb buffer when parsing headers
    # generated by scripts.
    #
    # This means that the longest URI that can be successfully sent
    # back using Location: headers is 8kb less the trailing CRLF less
    # the leading "Location: ", namely 8 * 1024 - 12 bytes.
    #
    # If it is longer than that, we'll just give the user a link.

    my $comment = '';
    if (length($result) < 8 * 1024 - 12) {
        print "Status: 303 See Other\nLocation: $result\n";
        $comment = "  <p>(Your browser should have redirected you automatically. If you see this it probably has a bug.)</p>\n";
    } else {
        print "Status: 200 OK\n";
    }
    print <<end;
Content-Type: text/html;charset=utf-8

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN">
<html lang="en">
 <head>
  <title>The data: URI kitchen: Results</title>
 </head>
 <body>
$comment  <p><a href="$resultEscaped"><code>$resultEscaped</code></a></p>
 </body>
</html>
end
} else {
    my $default = encode_entities(<<end);
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN">
<html lang="en">
 <head>
  <title>Test</title>
  <style type="text/css">
  </style>
 </head>
 <body>
  <p></p>
 </body>
</html>
end
    print <<end;
Content-Type: text/html;charset=utf-8

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN">
<html lang="en">
 <head>
  <title>The data: URI kitchen</title>
  <style type="text/css">
    textarea { display: block; }
    p { margin: 1em 0 0 0; }
    p + p.note { margin: 0; }
  </style>
 </head>
 <body>
  <form action="data" method="post" enctype="multipart/form-data">
   <h1>The data: URI kitchen</h1>
   <p>
    Type:
    <input type="text" name="type" value="text/html;charset=utf-8">
    <label><input type="checkbox" name="base64" value="1"> base64 </label>
<textarea rows="12" cols="80" name="content">$default</textarea>
   </p>
   <p>
    <label>Alternatively, give an HTTP URI to use as input: <input type="text" name="uri" size="70"></label>
   </p>
   <p class="note">If you use the URI option, the MIME type will be taken from the remote site, not the type field above.</p>
   <p>
    <label>Alternatively, upload a file: <input type="file" name="file"></label>
    <label><input type="checkbox" name="typeFromFile" checked="checked"> Trust UA-provided MIME type, if any.</label>
   </p>
   <p class="note">If you don't trust the UA-provided type, then the type you provide in the text field at the top will be used instead.</p>
   <p>
    <input type="submit" value="Generate">
   </p>
  </form>
  <p>See <a href="http://www.ietf.org/rfc/rfc2397">RFC 2387</a>. Privacy policy: All activity is logged, but only serious abuse will be publicised.</p>
  <pre>HTML4.0 DOCTYPE: &lt;!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN"&gt;
XHTML1 namespace: http://www.w3.org/1999/xhtml</pre>
 </body>
</html>
end
}

sub savelog {
    my($file, $type) = @_;
    my $directory = '/home/ianh/hixie.ch/software/utilities/cgi/data/files/';
    opendir(DIRECTORY, $directory) or return;
    my $filename = 1;
    foreach my $file (grep { /^file-[0-9]+-/ } readdir(DIRECTORY)) {
        $file =~ m/^file-([0-9]+)-/os;
        if ($filename <= $1) {
            $filename = $1 + 1;
        }
    }
    my $note = '';
    $filename = "file-$filename-";
    if ($type =~ m|^image/p?jpeg$|osi) {
        $filename .= '.jpeg';
    } elsif ($type =~ m|^image/gif$|osi) {
        $filename .= '.gif';
    } elsif ($type =~ m|^image/(?:x-)?png$|osi) {
        $filename .= '.png';
    } elsif ($type =~ m|^image/bmp$|osi) {
        $filename .= '.bmp';
    } elsif ($type =~ m|^image/(?:x-)?icon?$|osi) {
        $filename .= '.ico';
    } elsif ($type =~ m|^image/svg+xml(?: *;.*)?$|osi) {
        $filename .= '.svg';
    } elsif ($type =~ m|^audio/wav$|osi) {
        $filename .= '.wav';
    } elsif ($type =~ m|^audio/mpeg$|osi) {
        $filename .= '.mp3';
    } elsif ($type =~ m|^video/mpeg$|osi) {
        $filename .= '.mpeg';
    } elsif ($type =~ m|^text/html(?: *;.*)?$|osi) {
        $filename .= '.html';
    } elsif ($type =~ m|^text/css(?: *;.*)?$|osi) {
        $filename .= '.css';
    } elsif ($type =~ m|^text/plain(?: *;.*)?$|osi) {
        $filename .= '.txt';
    } elsif ($type =~ m|^text/javascript(?: *;.*)?$|osi) {
        $filename .= '.js';
    } elsif ($type =~ m|^text/xml(?: *;.*)?$|osi) {
        $filename .= '.xml';
    } elsif ($type =~ m|^application/pdf$|osi) {
        $filename .= '.pdf';
    } elsif ($type =~ m|^application/rtf$|osi) {
        $filename .= '.rtf';
    } elsif ($type =~ m|^application/xml(?: *;.*)?$|osi) {
        $filename .= '.xml';
    } elsif ($type =~ m|^application/octet-stream$|osi) {
        $filename .= '.bin';
    } elsif ($type =~ m|^application/(?:x-)?zip(?:-compressed)?$|osi) {
        $filename .= '.zip';
    } elsif ($type =~ m|^application/(?:x-)?(?:font-)?ttf$|osi) {
        $filename .= '.ttf';
    } elsif ($type =~ m|^application/x-shockwave-flash$|osi) {
        $filename .= '.swf';
    } elsif ($type =~ m|^application/msword$|osi) {
        $filename .= '.doc';
    } elsif ($type =~ m|^font/(?:x-)?(?:font-)?ttf$|osi) {
        $filename .= '.ttf';
    } else {
        $note = "# unknown type: $type\n";
    }
    closedir(DIRECTORY);
    open(OUTPUT, ">$directory$filename") or return;
    print OUTPUT $file;
    close(OUTPUT);
    open(HTACCESS, ">>${directory}.htaccess") or return;
    $ENV{REMOTE_ADDR} =~ m/^([^\n\r]*)$/gos;
    my $ip = $1;
    $ENV{HTTP_USER_AGENT} =~ m/^([^\n\r]*)$/gos;
    my $ua = $1;
    print HTACCESS "# IP: $ip\n# User-Agent: $ua\n$note<files $filename>\n  ForceType $type\n</files>\nAddDescription \"$type from $ip\" $filename\n\n";
    close(HTACCESS);
}
