75 # include <winsock2.h>
84 const std::string S_ToolName =
"urldbsample_remote";
85 const std::string S_ToolVersion =
"1.3";
87 std::string G_Locale =
"en_US";
95 "<dca-redist-folder> <ticket> <product> <encryption-data> <encryption-key> "
96 "<url-list-file> [<locale>] [<log-level>]\n"
97 " dca-redist-folder - the folder where the DCA is installed to\n"
98 " ticket - a valid ticket\n"
99 " product - the product associated with your ticket\n"
100 " hex-encryption-data - the encryption data (as hex string) included in "
102 " encryption-key - the encryption key included in your license\n"
103 " url-list-file - file that includes the URLs to classify\n"
104 " locale - optional locale for the categories names, default = en_US\n"
105 " log-level - optional log-level, default = 3 (LOG_Notice)\n\n"
113 # define DCA_BINDIR "bin/Win32"
115 # define DCA_BINDIR "bin/linux"
122 #define DCA_INITDIR "init"
127 #define DCA_LOGDIR "./logs"
158 static bool StartupLibraries()
163 WORD wVersionRequested = MAKEWORD( 2, 2 );
165 int err = WSAStartup( wVersionRequested, &wsaData );
167 std::cout <<
"Error on WSAStartup (" << err <<
") occured, aborting" <<
185 static void ShutdownLibraries()
206 static void SetupLicense(
const std::string& ticket,
const std::string& product,
209 licenseData.
ticket = ticket;
222 static bool SetupConnectionData(
const std::string& encData,
225 const int iEncKey = atoi( encKey.c_str() );
251 if( numOfCats == 0 ) {
252 std::cout <<
" No categories found." << std::endl;
260 const Category myCategory = myCategories.
byId( result );
261 const std::string catname = myCategory.
name( G_Locale );
264 std::cout <<
" " << (i+1) <<
".\t Category '" << catname <<
265 "' (id=" << myCategory.
id() <<
266 ", groupid=" << myCategory.
groupId() <<
278 std::cout <<
"IBM DCA Sample: " << S_ToolName <<
" (" << S_ToolVersion
288 std::cout <<
" usage:" << std::endl;
298 static void PrintDbConnectionInfo(
const DbConnection& aDbConnection )
303 std::cout <<
"URL Database Version: " << databaseInformation.
versionString
315 struct tm *expirationTime = localtime( &expirationDate );
317 std::cout <<
"License Info:" << std::endl;
318 std::cout <<
" DCA is " << ( aLicense.
isLicensed() ?
"licensed." :
319 "not licensed." ) << std::endl;
320 std::cout <<
" MaxUsers:" << aLicense.
getMaxUsers() <<
324 std::cout <<
" Ticket:" << aLicense.
getTicket() <<
326 std::cout <<
" Session:" << aLicense.
getSession() <<
330 std::cout <<
" Expiration Date:" << asctime( expirationTime ) <<
341 static void LoadUrlFile(
const std::string& fileName,
342 std::vector<std::string>& urlList )
344 std::ifstream fstream( fileName.c_str(), std::ios::in );
345 if (!fstream.is_open())
return;
349 while ( std::getline(fstream, line) )
351 if( !line.empty() && line[line.length()-1] ==
'\r')
352 line.erase( line.length() - 1 );
355 urlList.push_back( line );
378 void TestUrlClassification(
const std::string& aUrlListFile,
382 size_t urlsRequested = 0;
383 size_t unknownUrls = 0;
384 size_t uncategerizedUrls = 0;
385 size_t categoriesFound = 0;
387 std::cout <<
"Entering URL db classification routine..." << std::endl;
391 std::vector<std::string> myUrlList;
392 LoadUrlFile( aUrlListFile, myUrlList );
394 for( std::vector<std::string>::const_iterator U = myUrlList.begin(),
395 UEnd = myUrlList.end(); U != UEnd; ++U ) {
397 const std::string& myUrlString = *U;
399 std::cout <<
" Starting URL db classification for URL '" <<
400 myUrlString <<
"'" << std::endl;
409 myUrlDbClassifier.
classify( myUrl, myUrlClassificationResults );
412 std::cerr <<
" Error from URL db classification. Details: " <<
414 "). Continuing with next URL." << std::endl;
420 std::cout <<
" Results: URL '" << myUrlString <<
421 "' is not known in the database. " << std::endl;
422 std::cout <<
" Continuing with next URL." << std::endl;
430 std::cout <<
" Results: URL '" << myUrlString <<
431 "' is not categorized (but known in database). " << std::endl;
432 std::cout <<
" Continuing with next URL" << std::endl;
438 std::cout <<
" Classification Results for URL '" << myUrlString <<
440 PrintResults( myCategoriesInfo, myUrlClassificationResults );
441 categoriesFound += myUrlClassificationResults.
size();
445 std::cout <<
" Total Results: " << std::endl;
446 std::cout <<
" URLs requested:\t\t" << urlsRequested << std::endl;
447 std::cout <<
" URLs unknown:\t\t" << unknownUrls << std::endl;
448 std::cout <<
" URLs not categorized:\t" << uncategerizedUrls <<
450 std::cout <<
" URLs categorized:\t\t" <<
451 ( urlsRequested - unknownUrls - uncategerizedUrls ) << std::endl;
452 std::cout <<
" Categories found for URLs:\t" << categoriesFound <<
455 std::cout <<
"Leaving URL db classification routine." << std::endl;
466 std::string HexToString(
const std::string& arg )
468 if( (arg.length()%2) != 0 || arg.find(
"0x" ) != 0 )
471 const std::string cmd( arg.substr( 2 ) );
474 for( std::string::size_type i = 0; i < cmd.length(); i += 2 ) {
475 unsigned char hex[3] = { 0x00, 0x00, 0x00 };
479 sscanf( (
const char *)hex,
"%02X", &x );
480 result += (
unsigned char)x;
493 int main(
int argc,
char *argv[] )
505 std::string myRedistFolder = argv[ 1 ];
506 const std::string myTicket = argv[ 2 ];
507 const std::string myProduct = argv[ 3 ];
511 const std::string myEncData = HexToString( argv[ 4 ] );
513 const std::string myEncKey = argv[ 5 ];
514 const std::string myUrlList = argv[ 6 ];
522 G_LogLevel =
static_cast<LogLevel>( atoi( argv[8] ) );
526 if( myRedistFolder.empty() || myTicket.empty() ||
527 myProduct.empty() || myUrlList.empty() ||
528 myEncData.empty() || myEncKey.empty() ) {
534 const char c = myRedistFolder[ myRedistFolder.length() - 1 ];
535 if( c !=
'/' && c !=
'\\' )
536 myRedistFolder +=
"/";
539 if( !StartupLibraries() )
562 if ( !SetupConnectionData( myEncData, myEncKey, myDbConnectionData ) ) {
563 std::cout <<
"Could not setup remote connection because of wrong encryption data!" << std::endl;
570 PrintDbConnectionInfo( myDbConnection );
594 TestUrlClassification( myUrlList, myDca, myUrlDbClassifier,
602 catch(
const ExDca& ex ) {
603 std::cerr <<
"DCA Exception occured. Details: " << ex.
getDescription()
607 catch(
const std::exception& s ) {
608 std::cerr <<
"std::exception occured. Details: " << s.what() <<
"." <<
613 std::cerr <<
"Unknown exception caught." << std::endl;
static DCA_MODULE_ID_TYPE ID
The unique ID of the URL classification module.
Is used to create a License object. A license first must be created with DcaInstance::createLicense t...
Exception class used in the DCA.
bool isCategorized() const
Returns whether or not the URL matched one or more categories.
LogLevel setLogLevel(LogLevel newLevel)
Sets the logging level for the given class instance. The old value will be returned.
static Url create(const DcaInstance &aDcaInstance, const std::string &urlString)
Standard Url creation function.
const DCA_CATEGORIES_INFO_TYPE DCA_CAT_INFO_TYPE_URL
Refers to the internal categories info for URL classification.
void InitCUrl()
Initializes libcurl. Do not use any DCA function before initializing libcurl.
std::string initDir
the directory in which the DCA init files are stored
int getMaxSessions() const
Returns the maximum allowed sessions associated with your ticket/license.
time_t getExpirationDate() const
Returns the expiration date of the license in UTC.
std::string getDescription() const
Returns a description of the error.
Definition of a container class for Category objects.
CategoriesInfo getCategoriesInfo(DCA_CATEGORIES_INFO_TYPE categoryType) const
Returns the DCA internal categories, groups and locales.
DatabaseInformation getDatabaseInformation() const
Returns information about the underlying database.
DCA_RESULT_TYPE getReturnCode() const
Returns the last error code (if any).
FunctionResult classify(const Url &aUrl, UrlClassificationResults &urlResults) const
Performs the URL classification and returns the results.
static void SetupInitData(const std::string &redist_folder, InitData &initData)
Sets up the given initData by substituting the given redist_folder with DCA subdirectories.
LogLevel setLogLevel(LogLevel newLevel)
Sets the logging level for the given class instance. The old value will be returned.
std::string binDir
the directory in which the DCA binary (*.dca) files are stored
This header includes initialization/deinitialization support functions for the 3rd party libraries us...
unsigned int encryptionKey
The encryption key to be used (provided with your license)
A container class that allows access to the contained Categories, Groups and Locales.
Sets up options for embedded URL detection and provided Feedback mechanism.
@ LOG_Notice
Write notices / important information to the log file.
void SetOpenSslCallbacks()
Initializes the required callbacks for OpenSSL when using HTTPS or SSL connections in a multi-threade...
Stores the connection data for a database.
void UnsetOpenSslCallbacks()
Unsets the openssl callbacks. Do not call any DCA function after you have called this function.
DCA_CATEGORY_ID_TYPE id() const
The category id.
int getMaxUsers() const
Returns the maximum allowed users associated with your ticket/license.
void DeinitCUrl()
Deinitializes libcurl. Do not call any DCA function after you have called this function.
bool isUnknownUrl() const
Returns whether a URL is known or unknown. A URL is unknown if it is not contained in the database.
Database connection class for a local or remote database.
This header includes all header files of the URL Classification Package.
#define DCA_LOGDIR
Relative directory for logfile(s).
Main class for the URL classification.
#define DCA_INITDIR
DCA subdirectory of the DCA initialization data.
bool enable_Feedback
This switches the Feedback feature on or off. This is switched off by default.
DCA_RESULT_TYPE getReturnCode() const
Gets the code of the error.
DCA_CATEGORY_ID_TYPE UrlClassificationResult
The item of an URL classification result is typedef'd as DCA_CATEGORY_ID_TYPE.
Results of an URL classification.
bool detect_EmbeddedUrlsInUrlPath
If set to true, you can specify that embedded URL detection is additionally performed in the path par...
URL database classifier class.
static void PrintToolHeader()
Prints out the name and the version of this sample.
LogLevel
This enum is used in all setLogLevel() functions to change the verbosity level of the classes.
Use a License to initialize a classification package or a toolbox package.
Category byId(DCA_CATEGORY_ID_TYPE id) const
Returns the category with the given category id.
If you are using one or more proxy servers set up this structure and use it for e....
bool isLicensed(DCA_MODULE_ID_TYPE id=0, bool force=false) const
Checks whether the given License is valid for the given module id.
static void PrintLicenseInfo(const License &aLicense)
Prints out the information about the provided License.
std::string ticket
The ticket as provided in the license.
This header includes all header files of the DCA Base Package.
bool useLocalDatabase
Set to true to connect to a local or custom database, set to false to use a remote database.
Encapsulates the init and deinit of the DCA API.
Category NullCategory
Defines a constant unassigned Category you can use for checks. if( myCat == NullCategory ) --> myCat ...
std::string product
The product code used with the license.
const DbType DBT_Url
Used for DbConnection classes for URL classification.
static UrlClassification create(const DcaInstance &aDcaInstance, const License &aLicense)
Creates the URL classification module by using the given DcaInstance and License.
std::string logDir
the directory in which the DCA log file should be created
static void PrintUsage(const char *name)
Prints out the syntax of the sample.
std::string getLastMessage() const
Returns the last message received from our license server or if none available the last available mes...
size_t DCA_INDEX_TYPE
Type for index access (used for arrays and collections).
DbType dbType
The type of the database.
Categories getCategories() const
Returns the contained Categories.
std::string getDescription() const
Returns the description for the error or warning.
size_t DCA_SIZE_TYPE
Type for size (used for size of array and collections).
std::string name(const std::string &localeString=std::string()) const
Returns the localized (display) name of the category.
DbConnectionRemoteServerData remoteServerData
If you are creating a remote database connection, this structure must be filled out with the encrypti...
std::string getTicket() const
Returns the ticket of the license as string.
DCA_GROUP_ID_TYPE groupId() const
If the category is associated with a group, this is the group id.
bool enable_EmbeddedUrlDetection
If set to true embedded URL detection will be used in URL classification. This is switched on by defa...
#define DCA_BINDIR
DCA subdirectory of the DCA binaries.
Standard function result.
std::string getSession() const
Returns the session of the license as string.
Encapsulates a URL object.
UrlDbClassifier createDbClassifier(const DbConnection &aDbConnection, const UrlDbClassifierOptions &options=UrlDbClassifierOptions()) const
Create a URL database classifier. The classifier is created by using the provided database connection...
const std::string S_UsageString
Usage string, displayed if a parameter is missing.
DbConnection createDbConnection(const License &aLicense, const DbConnectionData &dbcData, const ProxySettings &proxySettings=ProxySettings(), LogLevel aLogLevel=LOG_Initial) const
Creates a DbConnection object using the given DbConnectionData.
This structure is used to initialize the DcaInstance.
Encapsulates a category as defined in the categories XML schema (see Categories XML: Categories).
static DcaInstance create(const InitData &initData)
Creates a DcaInstance, starts up the DCA API and initializes the required main module.
static void SetupLicense(const std::string &ticket, const std::string &product, LicenseData &licenseData)
Sets up the given licenseData by copying the given ticket and product strings.
std::string encryptionData
The encryption data to be used (provided with your license)
License createLicense(const LicenseData &licData, const ProxySettings &proxySettings=ProxySettings(), LogLevel aLogLevel=LOG_Initial) const
Creates a License object using the given LicenseData.
DCA_SIZE_TYPE size() const
Returns the number of results in the container.
int main(int argc, char *argv[])
The main routine.