/* $Revision: 15 $ $Date: 16/02/06 18:30 $ Copyright © 1999-2007, FSL Technologies Limited. Contact "http://fost.3.felspar.com". */ #include "stdafx.h" #include "FOST.internet.hpp" using namespace std; using namespace FSLib; using namespace FSLib::Exceptions; namespace { Revision c_revision( L"$Archive: /FOST.3/F3Util/url.cpp $", __DATE__, L"$Revision: 15 $", L"$Date: 16/02/06 18:30 $" ); const wchar_t RelativePathMsg[]=L"Relative path error"; } /* Exceptions */ RelativePath::RelativePath( const FSLib::wstring &base, const FSLib::wstring &rel, const FSLib::wstring &error ) { m_info << error << endl << L"Base comes from : " << base << endl << L"Relative pathname : " << rel << endl; } const wchar_t * const RelativePath::message() const { return RelativePathMsg; } /* FSLib::QueryString */ FSLib::QueryString::QueryString( const t_form form, const FSLib::wstring &str ) { switch ( form ) { case e_pathname: readPathname( str ); break; case e_encoded: readEncoded( str ); break; } } namespace { wchar_t digit( utf8 dig ) { if ( dig < 0x0a ) return dig + L'0'; if ( dig < 0x10 ) return dig + L'A' - 0x0a; throw FSLib::Exceptions::OutOfRange< utf8 >( L"Number to convert to hex digit is too big", 0, 0x10, dig ); } FSLib::wstring hex( utf8 ch ) { wchar_t num[ 4 ]; num[ 0 ] = '%'; num[ 1 ] = digit( ( ch & 0xf0 ) >> 4 ); num[ 2 ] = digit( ch & 0x0f ); num[ 3 ] = 0; return FSLib::wstring( num ); } const FSLib::string g_url_allowed( ".:/\\_-0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" ); FSLib::wstring encode( const FSLib::wstring &str ) { FSLib::string narrowed( narrow( str ) ); FSLib::wstring encoded; for ( FSLib::string::const_iterator it( narrowed.begin() ); it != narrowed.end(); ++it ) { if ( g_url_allowed.find( *it ) == std::wstring::npos ) encoded += hex( *it ); else encoded += utf32( *it ); } return encoded; } } void FSLib::QueryString::readPathname( const FSLib::wstring &path ) { m_get = encode( path ); } void FSLib::QueryString::readEncoded( const FSLib::wstring &str ) { for ( FSLib::wstring::const_iterator it( str.begin() ); it != str.end(); ++it ) if ( *it == L' ' ) m_get += L"%20"; else if ( *it < 0x20 || *it > 0x7f ) throw FSLib::Exceptions::ParseError( L"The encoded URL contains an invalid character (" + str + L")" ); else m_get += *it; } void FSLib::QueryString::append( const FSLib::wstring &name, const Nullable< FSLib::wstring > &value ) { if ( m_get.find( L"?" ) == std::wstring::npos ) m_get += L"?" + encode( name ) + L"="; else m_get += L"&" + encode( name ) + L"="; if ( !value.isnull() ) m_get += encode( value.value() ); } FSLib::wstring FSLib::QueryString::pathspec() const { return m_get; } /* URL */ inline Url::Url() : m_scheme( L"http" ), m_host( L"localhost" ), m_port( 80 ) { } inline Url::Url( const Scheme &scheme, const Host &host, const t_port port ) : m_scheme( scheme ), m_host( host ), m_username(), m_password(), m_port( port ) { if ( m_port < 1 ) { m_port = m_scheme.port(); } } inline Url::Url( const Scheme &scheme, const Host &host, const Nullable< FSLib::wstring > &user, const Nullable< FSLib::wstring > &pword, const t_port port ) : m_scheme( scheme ), m_host( host ), m_username( user ), m_password( pword ), m_port( port ), m_pathSpec( L"/" ) { if ( m_port < 1 ) { m_port = m_scheme.port(); } } inline Url::Url( const FSLib::wstring & a_url ) : m_host( L"localhost" ), m_scheme( L"http" ) { // Tried writing this in C++ String stuff. Nasty and clunky. wchar_t * s=new wchar_t[a_url.length()+1]; const wchar_t * src=a_url.c_str(); FSLib::wstring t_host; FSLib::wstring l_port; FSLib::wstring t_pathSpec; FSLib::wstring t_scheme; FSLib::wstring t_user; FSLib::wstring t_password; memcpy( s, src, a_url.length() * sizeof( wchar_t ) ); s[ a_url.length() ]=0; wchar_t * begin=s, *current=s; for( wchar_t lookingfor=L':'; *current; current++ ) { if(*current==lookingfor) { // Found what I'm looking for. if(lookingfor==L':') { *current=0; t_scheme=begin; current++; while((*current)&&(*current==L'/')) current++; // Skip until after the double // that should be there. if(*current) { // Now, assuming we didn't hit the end of the string... begin=current; lookingfor=L'/'; } } else { *current=0; t_host=begin; *current=L'/'; t_pathSpec=current; break; } } } // None of this works if the URL has no pathspec at all... Like http://www.thawte.com or some such. Which is illegal, but people do it. if(L""==t_host) { t_host=begin; t_pathSpec=L'/'; } // Now check the host portion for usernames. delete[] s; if(t_host.find(L'@')!=std::string::npos) { t_user=t_host.substr(0,t_host.find(L'@')); t_host=t_host.substr(t_host.find(L'@')+1); if(t_user.find(L':')!=std::string::npos) { t_password=t_user.substr(t_user.find(L':')+1); t_user=t_user.substr(0,t_user.find(L':')); } } // And ports. if(t_host.find(L':')!=std::string::npos) { l_port=t_host.substr(t_host.find(L':')+1); t_host=t_host.substr(0, t_host.find(L':')); } // Scheme: try { m_scheme = Scheme( t_scheme ); } catch ( InvalidScheme & ) { throw ParseError( L"Unknown scheme " + t_scheme ); } m_host=Host(t_host); if(t_user!=L"") { m_username=t_user; m_password=t_password; } m_pathSpec=t_pathSpec; if(l_port==L"") { m_port=m_scheme.port(); } else { int p = _wtoi(l_port.c_str()); if ( p > 65535 ) throw ParseError( L"Port number not a valid port: " + l_port ); m_port = t_port( p ); } SplitPathSpec(); } inline FSLib::wstring Url::asString() const { wstringstream s; // First part e.g. http:// s << m_scheme.monicker() << L"://"; // Place username/password next if ( !m_username.isnull() ) { s << m_username.value() << L":"; if ( !m_password.isnull() ) { s << m_password.value(); } s << L"@"; } else if ( !m_password.isnull() ) { s << L":" << m_password.value() << L"@"; } // Add the host s << L"" << m_host.hostName(); // Optional port number if ( m_port != m_scheme.port() ) { s << L":" << m_port; } // File specifier s << m_pathSpec; return s.str(); } inline Host Url::host() const { return m_host; } inline Scheme Url::scheme() const { return m_scheme; } inline t_port Url::port() const { return m_port; } inline FSLib::Nullable< FSLib::wstring > Url::user() const { return m_username; } inline FSLib::Nullable< FSLib::wstring > Url::password() const { return m_password; } inline const FSLib::wstring &Url::pathSpec() const { return m_pathSpec; } inline void Url::pathSpec( const FSLib::wstring &a_pathName ) { FSLib::wstring pathName( a_pathName ); // First, formalise the new path. if( pathName.length() == 0 ) { pathName = L"/"; // Assume that if it's blank, the caller means '/' } // Obvious directory fixes. if( pathName.find( L"/." )==(pathName.length()-2) ) { pathName += L'/'; // Add terminating slash if it ends with /. } else if( pathName.find( L"/.." )==(pathName.length()-3) ) { pathName += L'/'; // Or /.. } else if( pathName==L"." ) { pathName += L'/'; } else if( pathName==L".." ) { pathName += L'/'; // Or if it's simply '..' or '.', both of which really mean '../' or './' anyway. } // Now do we add or replace? if ( pathName[ 0 ] != L'/' ) { if( m_pathSpec[ m_pathSpec.length() - 1 ]!=L'/' ) { m_pathSpec+=L"/../"; } // Whatever happens, we don't need the last part of the pathSpec anymore - the file name. m_pathSpec += pathName; } else { // It begins with a /, so we replace. m_pathSpec = pathName; } SplitPathSpec(); // Deal with any '.' or '..' elements in the path. We do this every time, in case some external piece of software has fucked up. } /* * Note that given a pathSpec ending in '/', this will leave it like that. * What actually happens is that a blank entry gets shoved into the list, and this recreates it later. * All good stuff, and exactly what we want. */ inline void Url::SplitPathSpec( void ) const { FSLib::wstring t_path(m_pathSpec); // kgs - Not entirely sure what this is supposed to do. Presume that it means map all occurances of '//' with '/'. // If this is the case then the logic was wrong. Because we need to replace two characters we have to have a 2 in there and not a 1 while( t_path.find( L"//" ) != std::wstring::npos ) { t_path.replace( t_path.find( L"//" ), 2, L"/" ); } if( t_path[0] == L'/' ) { t_path = t_path.substr( 1 ); // Trim off any leading / - otherwise we get a blank entry at the beginning. } m_pathList.erase( m_pathList.begin(), m_pathList.end() ); {for( int i=0;; i++ ) { m_pathList.push_back( t_path.substr(0, t_path.find_first_of(L'/')) ); if( std::wstring::npos == t_path.find_first_of( L'/' ) ) { break; } t_path=t_path.substr( t_path.find_first_of( L'/' ) + 1 ); }} {for( pathlist::iterator P=m_pathList.begin(); P!=m_pathList.end(); ++P ) { if( (*P)==L"." ) { m_pathList.erase( P ); P=m_pathList.begin(); } else if( (*P)==L".." ) { if( P==m_pathList.begin() ) { m_pathList.erase( P ); } else { pathlist::iterator P1=--P; ++P; pathlist::iterator P2=++P; m_pathList.erase( P1, P2 ); } P=m_pathList.begin(); } }} m_pathSpec=L""; {for( pathlist::const_iterator P=m_pathList.begin(); P!=m_pathList.end(); ++P ) { m_pathSpec+=L'/'+(*P); }} // kgs - Guessing same fix as for the beginning while( m_pathSpec.find( L"//" ) != std::wstring::npos ) { m_pathSpec.replace( t_path.find( L"//" ), t_path.find( L"//" ) + 2, L"/" ); } m_pathList.erase( m_pathList.begin(), m_pathList.end() ); // Really a memory saving operation. } /* $History: url.cpp $ * * ***************** Version 15 ***************** * User: Kirit Date: 16/02/06 Time: 18:30 * Updated in $/FOST.3/F3Util * Improved URL encoding. * * ***************** Version 14 ***************** * User: Kirit Date: 30/10/05 Time: 14:39 * Updated in $/FOST.3/F3Util * Removed mangling for ampersands as it doesn't belong in the URLs (only * in the HTML). * * ***************** Version 13 ***************** * User: Kirit Date: 18/08/05 Time: 13:08 * Updated in $/FOST.3/F3Util * Added default constructor for Url. * * ***************** Version 12 ***************** * User: Kirit Date: 27/05/05 Time: 15:37 * Updated in $/FOST.3/F3Util * Added QueryString to help build valid URLs to embed into pages. * * ***************** Version 11 ***************** * User: Kirit Date: 3/11/04 Time: 21:51 * Updated in $/FOST.3/F3Util * First working version of FSLib::wstring using UTF-16. * * ***************** Version 10 ***************** * User: Kirit Date: 9/10/03 Time: 15:44 * Updated in $/FOST.3/F3Util * Headers re-arranged to give a proper SDK feel and to make determination * of required headers simpler. * * ***************** Version 8 ***************** * User: Kirit Date: 4/10/03 Time: 15:40 * Updated in $/FOST.3/F3Util * Internet library now added to F3Util.DLL (including use of newer * version of SSLeay). * * ***************** Version 7 ***************** * User: Kirit Date: 3/10/03 Time: 16:09 * Updated in $/FOST/Cpp/FSInternet * Added stdafx.h and stdafx.cpp pre-compiled header support. * * ***************** Version 6 ***************** * User: Kirit Date: 7/01/03 Time: 14:46 * Updated in $/FOST/Cpp/FSInternet * Changes to Nullable<> to make use comparisons less error prone. * * ***************** Version 5 ***************** * User: Kirit Date: 7/05/02 Time: 12:01 * Updated in $/FOST/Cpp/FSInternet * Wrapped std::basic_string<> in order to change the copy semantics to * stop the problems with using C++ in the COM layer for IIS. * * ***************** Version 4 ***************** * User: Kirit Date: 19/03/02 Time: 20:54 * Updated in $/FOST/Cpp/FSInternet * Corrected history from last check-in. * Updated TestAX.IDL to remove registry entries from removed classes. * * ***************** Version 3 ***************** * User: Kirit Date: 19/03/02 Time: 19:48 * Updated in $/FOST/Cpp/FSInternet * Debug compiles now all end in '_d'. * The C++ implementation names (Type.cppName) have been changed to also * include the DLL that the implementation is in as per the documentation. * In all source files the following have been done: * * Ensured that every header file has a revision object. * * Ensured that every translation unit has a revision object. * * Ensured that every source file (which understands comments) * has a history section. * * Changed all copyright notices to be for Obsideon Ltd. * * Changed copyright notices to use the longest possible timeframe. * * ***************** Version 2 ***************** * User: Kirit Date: 13/10/01 Time: 20:08 * Updated in $/FOST/FSInternet * Addition of Url class. * * ***************** Version 1 ***************** * User: Kirit Date: 13/10/01 Time: 19:31 * Created in $/FOST/FSInternet */