/* $Revision: 15 $ $Date: 3/11/04 21:51 $ Copyright © 1999-2007, FSL Technologies Limited. Contact "http://fost.3.felspar.com". */ #include "stdafx.h" #include "FOST.internet.hpp" #include "FOST.com.hpp" using namespace FSLib; using namespace FSLib::Exceptions; namespace { Revision c_revision( L"$Archive: /FOST.3/F3Util/http.cpp $", __DATE__, L"$Revision: 15 $", L"$Date: 3/11/04 21:51 $" ); const Setting c_userAgent( L"$Archive: /FOST.3/F3Util/http.cpp $", L"HTTP", L"UserAgent", L"FSLTech/0.0 ", true ); const wchar_t *NotHttpMsg = L"Scheme not recognised as a HTTP protocol"; const wchar_t *BadRequestMsg = L"Request method not recognised"; const wchar_t *Base64Msg = L"Base 64 encoding error"; const wchar_t *HeaderMsg = L"HTTP invalid header error"; const wchar_t *TransportMsg = L"TCP/SSL Transport problem"; const wchar_t *BodyMsg = L"HTTP protocol error while fetching body"; const SchemePort c_http( L"http", 80 ); const SchemePort c_https( L"https", 443 ); } /* Exceptions */ inline NotHttp::NotHttp( const Scheme &s ) { m_info << "Scheme: " << s.monicker() << " on port " << s.port() << std::endl; } inline const wchar_t *const NotHttp::message() const { return NotHttpMsg; } inline BadRequest::BadRequest( const FSLib::wstring &m ) : Exception( m ) { } inline const wchar_t *const BadRequest::message() const { return BadRequestMsg; } inline Base64::Base64( const FSLib::wstring &m, int value ) { m_info << m << " : " << value << std::endl; } inline const wchar_t *const Base64::message() const { return Base64Msg; } inline Header::Header( const FSLib::wstring &m ) : Protocol( m ) { } inline const wchar_t *const Header::message() const { return HeaderMsg; } inline Body::Body( const FSLib::wstring &m ) : Protocol( m ) { } inline const wchar_t *const Body::message() const { return BodyMsg; } inline Transport::Transport( const FSLib::wstring &m ) : Protocol( m ) { } inline const wchar_t *const Transport::message() const { return TransportMsg; } /* Http class */ inline UserAgent::UserAgent(const FSLib::wstring &a_method, const Url &a_url) : m_url(a_url), m_request(a_method, a_url), m_reply(), m_certificate() { create_connection(); } inline UserAgent::UserAgent(const FSLib::wstring &a_method, const Url &a_url, const FSLib::wstring &a_certificate) : m_url(a_url), m_request(a_method, a_url), m_reply(), m_certificate(a_certificate) { create_connection(); } inline UserAgent::UserAgent(const Url &a_url) : m_url( a_url ), m_request( L"GET", a_url ), m_reply(), m_certificate() { create_connection(); } inline UserAgent::UserAgent( const Url &a_url, const FSLib::wstring &a_certificate ) : m_url( a_url ), m_request( L"GET", a_url ), m_reply(), m_certificate( a_certificate ) { create_connection(); } inline void UserAgent::go() { if ( m_connection->bad() || m_connection->eof() ) { create_connection(); } for ( int retry = 0; ;retry++ ) { try { m_request.print( *m_connection ); do { m_reply.fetch( *m_connection ); // if( (m_reply.header( "Connection" )==std::string("close")) || ((m_reply.statusHex()>>8)>=4) ) { // Want to check if there's a Connection: close header, too, but this throws a mysterious exception... if( ( m_reply.statusHex() >> 8 ) >= 4 ) { create_connection(); } } while ( ( m_reply.statusHex() >> 8 ) == 1 ); // Loop through any continues. break; } catch ( FSLib::Exceptions::Transport &e ) { // If we're under 3 retries, then we'll try again. Otherwise, throw the whole thing. if ( retry > 3 ) { throw e; } else { create_connection(); } } } } inline void UserAgent::create_connection() { if( m_url.scheme().monicker() == c_https.monicker() ) { if( m_certificate.length() == 0 ) { m_connection = boost::shared_ptr< std::iostream >( new SSLStream( m_url.host().address(), m_url.port() ) ); //(*mylog)<<"Using SSL with no certificate.\n"; } else { m_connection = boost::shared_ptr< std::iostream >( new SSLStream( m_url.host().address(), m_url.port(), m_certificate ) ); //(*mylog)<<"Using SSL with certificate "<( new TCPStream( m_url.host().address(), m_url.port() ) ); } else { throw NotHttp( m_url.scheme() ); } } inline const UserAgent::Reply &UserAgent::reply() const { return m_reply; } inline UserAgent::Request &UserAgent::request() { // Cannot be const - need to add headers. return m_request; } inline UserAgent::Request::Request(const FSLib::wstring &a_method, const Url &a_url) : m_method(a_method), m_url(a_url), m_header(), m_post_values() { addStdHeaders(); } inline void UserAgent::Request::addHeader( const FSLib::wstring a_header, const FSLib::wstring a_val ) { m_header.insert( std::make_pair( a_header, a_val ) ); } inline void UserAgent::Request::addValue(const FSLib::wstring a_var, const FSLib::wstring a_val) { m_post_values.insert( std::make_pair( a_var, a_val ) ); } inline void UserAgent::Request::addStdHeaders() { // addHeader( "Connection", "close" ); // Useful to force closure of connection after each transaction. addHeader( L"User-Agent", Setting::value( L"HTTP", L"UserAgent" ) ); addHeader( L"Host", m_url.host().hostName() ); if( !m_url.user().isnull() ) { string password; if( !m_url.password().isnull() ) { password = narrow( m_url.password().value() ); } addHeader( L"Authorization", L"Basic " + widen( base64_encode( narrow( m_url.user().value() ) + ":" + password ) ) ); } } inline const Url &UserAgent::Request::url() const { return m_url; } inline const UserAgent::Request::t_headers &UserAgent::Request::headers() const { return m_header; } inline const UserAgent::Request::t_values &UserAgent::Request::values() const { return m_post_values; } inline const FSLib::wstring &UserAgent::Request::method() const { return m_method; } inline void UserAgent::Request::print( std::ostream &a_output ) { if ( m_method != L"GET" && m_method != L"POST" ) { throw BadRequest( m_method + L" is an unknown method" ); } // Actual request line. a_output << narrow( m_method ) << ' ' << narrow( m_url.pathSpec() ); // Test the transport layer. if( a_output.bad() || a_output.eof() ) { throw Transport( m_url.scheme().monicker() + L" transport layer failed immediately." ); } FSLib::string data=url_encode_variables(); if( data.length() > 0 ) { if( m_method == L"GET" ) { a_output << '?' << data; } else { addHeader( L"Content-Length", toString( static_cast< unsigned __int64 >( data.length() ) ) ); addHeader( L"Content-Type", L"application/x-www-form-urlencoded" ); } } // Does this not assume that the output mode is binary? (kgs) a_output << " HTTP/1.1\r\n"; // Now headers... for ( t_headers::const_iterator it( headers().begin() ); it != headers().end(); ++it ) { a_output << narrow( (*it).first ) << ": " << narrow( (*it).second ) << "\r\n"; } a_output << "\r\n"; if( a_output.bad() || a_output.eof() ) { throw Transport( m_url.scheme().monicker() + L" transport layer failed during request header." ); } // Now we should, if we're a POST, do the body, too. if ( m_method==L"POST" && data.length() > 0 ) { a_output << data.c_str(); } if (a_output.bad() || a_output.eof()) { throw Transport( m_url.scheme().monicker() + L" transport layer failed during request body." ); } a_output.flush(); } inline FSLib::string UserAgent::Request::url_encode_to_hex(const unsigned char ch) const { FSLib::string res("%"); unsigned char val = ch; if( (val/16) > 9 ) res += ('A'+((val/16)-10)); else res += ('0'+(val/16)); val -= ( val / 16 ) * 16; if( ( val % 16 ) > 9 ) res += ('A' + ( ( val % 16 ) - 10 ) ); else res += ( '0' + ( val % 16 ) ); return res; } inline FSLib::string UserAgent::Request::url_encode(const FSLib::string &data) const { FSLib::string result(data); for(FSLib::wstring::size_type i=0; i='A') && (result[i]<='Z')) || ((result[i]>='a') && (result[i]<='z')) || ((result[i]>='0') && (result[i]<='9')) ) continue; // Do nothing if we're looking at a letter or a number. else if(result[i]==' ') result[i]='+'; // Change any spaces to +. else { result.replace(i, 1, url_encode_to_hex(result[i])); // Change anything else to %{0-9A-F}{0-9A-F} notation. i+=2; // Skip the newly added bits. } } return FSLib::string(result); } inline FSLib::string UserAgent::Request::url_encode_variables() const { FSLib::string result(""); int i = 0; for( t_values::const_iterator P( m_post_values.begin() ); P!=m_post_values.end(); P++, i++ ) { if( i > 0 ) { result += '&' ; } result += url_encode( narrow( (*P).first ) ); result += '='; result += url_encode( narrow( (*P).second ) ); } return result; } /* From RFC2045: Table 1: The Base64 Alphabet Value Encoding Value Encoding Value Encoding Value Encoding 0 A 17 R 34 i 51 z 1 B 18 S 35 j 52 0 2 C 19 T 36 k 53 1 3 D 20 U 37 l 54 2 4 E 21 V 38 m 55 3 5 F 22 W 39 n 56 4 6 G 23 X 40 o 57 5 7 H 24 Y 41 p 58 6 8 I 25 Z 42 q 59 7 9 J 26 a 43 r 60 8 10 K 27 b 44 s 61 9 11 L 28 c 45 t 62 + 12 M 29 d 46 u 63 / 13 N 30 e 47 v 14 O 31 f 48 w (pad) = 15 P 32 g 49 x 16 Q 33 h 50 y */ // For the following, bits MUST be less than 64. In other words, 6 bits only. inline char UserAgent::Request::base64_encode_6bits( const unsigned short int bits ) const { if( bits > 63 ) { throw Base64( L"Too many bits", bits ); } if ( bits < 26 ) { return char( ( 'A' + bits ) ); } else if ( bits < 52 ) { return char( 'a' + ( bits - 26 ) ); } else if ( bits < 62 ) { return char( '0' + ( bits - 52 ) ); } else if ( bits == 62 ) { return '+'; } else { // if(bits==63) - which it must do. return '/'; } } // Should be passed 3 bytes *or less*. If passed less, then we pad with '='. inline FSLib::string UserAgent::Request::base64_encode_3bytes(const FSLib::string &data) const { if(data.length() > 3) { throw Base64( L"Too many bytes", int( data.length() ) ); } std::string ret(""); unsigned short int bits=0; // Okay, what surprised me (and got me) is that we deal with bits in MSB->LSB order. // Stage one. Extract the high bits of the first byte. bits=(data[0]&0xFC)>>2; // 0xFC should be 1111.1100 (Dots seperate nibbles.) ret+=base64_encode_6bits(bits); // Stage two. bottom two bits of the first byte, top four of the second. bits=(data[0]&0x03)<<4; // 0x03 should be 0000.0011 if(data.length() > 1) bits+=(data[1]&0xF0)>>4; // 0xF0 should be 1111.0000 (And if I've got this wrong... :-) ) ret+=base64_encode_6bits(bits); // Stage three. Bottom four of [1], top two of [2]. if(data.length() > 1) { bits=(data[1]&0x0F)<<2; // 0000.1111 if(data.length() > 2) bits+=(data[2]&0xC0)>>6; ret+=base64_encode_6bits(bits); // 1100.000 } else ret+='='; // Stage four. Bottom six bits of [2]. if(data.length() > 2) { bits=(data[2]&0x3F); ret+=base64_encode_6bits(bits); } else ret+='='; return(ret); } inline FSLib::string UserAgent::Request::base64_encode(const FSLib::string &data) const { FSLib::string ret(""); for ( unsigned int i( 0 ); i <= data.length(); i += 3 ) { ret += base64_encode_3bytes( FSLib::string( data, i, 3 ) ); // At first glance, this looks like an overflow, but the third argument to the constructor, if // 'too big', is taken to mean the 'rest' of the string. Given that the calculation is already being // done, we save some tortological processing here... } return ret; } inline UserAgent::Reply::Reply() : m_body(), m_status_line(), m_header(), m_status( 0 ) { // Nowt. } inline std::pair< bool, FSLib::string > UserAgent::Reply::get_line( std::istream &connection ) const { FSLib::string line; while( connection.good() ) { char c; connection.get(c); if( connection.eof() ) break; else if(c=='\r') continue; else if(c=='\n') break; else line+=c; } return make_pair( connection.good(), line ); } inline int UserAgent::Reply::unHex( const FSLib::string &text ) const { int ret=0; for( unsigned int i=0; i='A') && (text[i]<='F') ) { ret+=(text[i]-'A'+10); } else if( (text[i]>='a') && (text[i]<='f') ) { ret+=(text[i]-'a'+10); } else if( text[i]==' ' ) { // Should NEVER happen, but some implementations apparently pad the RHS with spaces. continue; // Ignore them. (Skipping the shift.) } else if( text[i]==';' ) { // End of chunk-size, beginning of options. // We don't understand any options - none are defined, as far as I can tell from 2068. break; // Quit the loop. (Skipping the shift.) } else { throw Body( L"Chunk size contains non-hex characters: " + widen( text ) ); } ret<<=4; } ret>>=4; return ret; } inline bool UserAgent::Reply::fetchStatus(std::istream &input) { bool http09( false ); if( input.good() ) { std::pair< bool, string > firstLine( get_line( input ) ); m_status_line = widen( firstLine.second ); if ( input.eof() || input.bad() ) { throw Transport( L"Transport layer failed during reply." ); } if( 0 == m_status_line.find(L"HTTP") ) { // Okay, we're toasting, we have HTTP. // Wish I had regular expressions. Then it'd just be m_status_line =~ /^HTTP\/([0-9]\.[0-9])\s([0-9][0-9][0-9])\s(.*)$/ // At least, in Perl. // Here, we pick the string at 9, for 3. // HTTP/1.1 200 ok // 0123456789AB m_status = _wtoi( m_status_line.substr( 9, 3 ).c_str() ); } else { m_body += firstLine.second; http09 = true; // If we don't get the expected "HTTP", then we're probably talking HTTP/0.9 } } return http09; } inline void UserAgent::Reply::fetchHeaders( std::istream &input ) { while ( input.good() ) { std::pair< bool, FSLib::string > line( get_line( input ) ); std::string::size_type s( line.second.find( ": " ) ); if( s != std::wstring::npos ) { t_headers::value_type P( FSLib::wstring( widen( line.second ), 0, s ), FSLib::wstring( widen( line.second ), s+2, std::wstring::npos ) ); // Does that make *any* sense to the reader? // Break the line up into a pair. pair.first is everything before the ": ", // pair.second is everything after the ": ". m_header.insert( P ); } else if ( line.second != "" ) { if ( line.second.substr( 0, 4 ) != "HTTP" ) { // Observed cases where an extra status line is sent through. I suspect this is illegal, really. throw Header( L"Invalid header received: " + widen( line.second ) ); } } else { break; } } } inline void UserAgent::Reply::fetchBody( std::istream &input ) { char c; while(input.good()) { input.get(c); if(input.eof()) { break; } m_body += c; } } inline void UserAgent::Reply::fetchBody( std::istream &input, int length ) { char c; while ( input.good() && length != 0 ) { input.get( c ); if ( input.eof() ) { throw Body( L"Unexpected closure of connection during Body." ); // If we have a Content-Length, and we get an EOF before we've read all the characters, // then that's a problem. The body we now have is unlikely to be complete. } --length; m_body += c; } } inline void UserAgent::Reply::fetchBodyChunked( std::istream &input ) { if(input.good()) { int content_length; for (;;) { FSLib::string header( get_line( input ).second ); if ( input.eof() ) { throw Body( L"Unexpected closure of connection during chunk fetch." ); } content_length = unHex( header ); if ( content_length == 0 ) { break; } fetchBody( input, content_length ); get_line( input ); // Remove and discard trailing CRLF pair. (Why *did* they put that in the standard?) }; // Chunks loaded in, now read in any extra headers. fetchHeaders( input ); } } inline void UserAgent::Reply::fetch( std::istream &input ) { m_body = ""; m_status = 99; m_header.erase( m_header.begin(), m_header.end() ); m_status_line = L""; // Get status line. bool http09=fetchStatus( input ); // Get headers. if ( !http09 ) { fetchHeaders( input ); } // Get body. if ( (statusHex()>>8)==1 ) return; // 1** responses contain no body. Others don't either, for example 204, but it's easier not to worry. bool chunked=(m_header[L"Transfer-Encoding"]==L"chunked"); if ( !chunked ) { if ( input.good() ) { if ( m_header.find( L"Content-Length" ) == m_header.end() ) { fetchBody( input ); } else { fetchBody( input, _wtoi( m_header[ L"Content-Length" ].c_str() ) ); } } } else /* if(chunked) */ { fetchBodyChunked( input ); } } inline const FSLib::wstring &UserAgent::Reply::header( const FSLib::wstring &a_header ) const { if ( m_header.find( a_header ) != m_header.end() ) { return ( *m_header.find( a_header ) ).second; } else { throw Header( L"Header not provided by remote host: " + a_header ); } } inline const UserAgent::Reply::t_headers &UserAgent::Reply::headers() const { return m_header; } inline const FSLib::string &UserAgent::Reply::body() const { return m_body; } inline int UserAgent::Reply::status() const { return m_status; } // This takes the status, say 200, and returns a hex number, in this case 0x200, whose representation is the same. // This makes certain tests rather easier, since we can test for general failure with (hex&0xF00)==0x500 rather than // (dec>=500)&&(dec<600) inline int UserAgent::Reply::statusHex() const { int hex=0; int temp=m_status; hex+=(temp/100)<<8; temp-=((temp/100)*100); hex+=(temp/10)<<4; temp-=((temp/10)*10); hex+=temp; return hex; } /* FSLib::Isapi::Request */ Isapi::Response::Response( const Input &s ) : server( s ), cookies( s ), query( variant_cast< string >( s.value( L"QUERY_STRING" ) ) ), m_preamble( false ), status( "200 OK" ) { headers.add( L"Content-Type", L"text/html" ).subvalue( L"charset", L"utf-8" ); } Isapi::Response::~Response() { } namespace { string headers( const Headers &headers ) { string tosend; for ( Headers::const_iterator it( headers.begin() ); it != headers.end(); ++it ) { tosend += narrow( (*it).first + L": " + (*it).second.value() ); for ( Headers::Value::const_iterator val( (*it).second.begin() ); val != (*it).second.end(); ++val ) tosend += narrow( L"; " + (*val).first + L"=" + (*val).second ); tosend += "\r\n"; } return tosend; } } size_t Isapi::Response::exists( const wstring &name, const Nullable< wstring > &partition ) const { if ( query.exists( name, partition ) ) return query.exists( name, partition ); if ( cookies.exists( name, partition ) ) return cookies.exists( name, partition ); if ( server.exists( name, partition ) ) return server.exists( name, partition ); return 0; } _variant_t Isapi::Response::value( const wstring &name, const Nullable< size_t > &num, const FSLib::Nullable< FSLib::wstring > &partition ) const { if ( query.exists( name, partition ) ) return query.value( name, num, partition ); if ( cookies.exists( name, partition ) ) return cookies.value( name, num, partition ); if ( server.exists( name, partition ) ) return server.value( name, num, partition ); throw FSLib::Exceptions::Field( L"Value not found in this request", L"Name: " + name + L"\nNumber: " + toString( num.value( 0 ) ) + L"\nPartition: " + partition.value( wstring() ) ); } void Isapi::Response::partial( const FSLib::string &str ) { if ( !m_preamble ) { m_preamble = true; send_headers( ::headers( headers ) ); } send_data( reinterpret_cast< const utf8 * >( str.c_str() ), str.length() ); } void Isapi::Response::complete( const FSLib::string &str ) { if ( m_preamble ) throw FSLib::Exceptions::Header( L"Data already written to client" ); m_preamble = true; headers.add( L"Content-Length", toString( str.length() ) ); send_headers( ::headers( headers ) ); partial( str ); } void Isapi::Response::send_data( const FSLib::string &str ) const { send_data( reinterpret_cast< const utf8 * >( str.c_str() ), str.length(), true ); } /* FSLib::Isapi::Response::Headers */ std::pair< FSLib::wstring, FSLib::Headers::Value > Isapi::Response::Headers::value( const wstring &/*name*/, const wstring &/*value*/ ) { throw FSLib::Exceptions::NotImplemented( L"HTTP response headers can only be written, not parsed" ); }