dca_interface  6.3.4
1 /* IBM Source Code */
2 /* (C) Copyright IBM Corp. 2009, 2012 */
3 /* Licensed Materials - Property of IBM */
4 /* US Government Users Restricted Rights - Use duplication or disclosure restricted by GSA Schedule Contract with IBM Corp. */
67 #include <string>
68 #include <vector>
69 #include <iostream>
70 #include <fstream>
71 #include <cstdlib>
72 #include <ctime>
74 #ifdef WIN32
75 # include <winsock2.h>
76 #endif
78 #include "dca/dca_base.h"
80 #include "dca/dca_callbacks.h"
82 using namespace dca;
84 const std::string S_ToolName = "urldbsample_remote";
85 const std::string S_ToolVersion = "1.3";
87 std::string G_Locale = "en_US"; // Default locale
88 LogLevel G_LogLevel = LOG_Notice; // Default log level
94 const std::string S_UsageString =
95 "<dca-redist-folder> <ticket> <product> <encryption-data> <encryption-key> "
96 "<url-list-file> [<locale>] [<log-level>]\n"
97  " dca-redist-folder - the folder where the DCA is installed to\n"
98  " ticket - a valid ticket\n"
99  " product - the product associated with your ticket\n"
100  " hex-encryption-data - the encryption data (as hex string) included in "
101 "your license\n"
102  " encryption-key - the encryption key included in your license\n"
103  " url-list-file - file that includes the URLs to classify\n"
104  " locale - optional locale for the categories names, default = en_US\n"
105  " log-level - optional log-level, default = 3 (LOG_Notice)\n\n"
106  ;
112 #ifdef WIN32
113 # define DCA_BINDIR "bin/Win32"
114 #else
115 # define DCA_BINDIR "bin/linux"
116 #endif
122 #define DCA_INITDIR "init"
127 #define DCA_LOGDIR "./logs"
139 static void SetupInitData( const std::string& redist_folder,
140  InitData& initData )
141 {
142  initData.binDir = redist_folder + DCA_BINDIR;
143  initData.initDir = redist_folder + DCA_INITDIR;
144  initData.logDir = DCA_LOGDIR;
145 }
158 static bool StartupLibraries()
159 {
160 #ifdef WIN32
161  // Windows needs an extra socket-startup for this process to work
162  // correctly with e.g. IP(v6) input IP addresses
163  WORD wVersionRequested = MAKEWORD( 2, 2 );
164  WSADATA wsaData;
165  int err = WSAStartup( wVersionRequested, &wsaData );
166  if ( err != 0 ) {
167  std::cout << "Error on WSAStartup (" << err << ") occured, aborting" <<
168  std::endl;
169  return false;
170  }
171 #endif
173  // init the 3rd party libraries
174  InitCUrl();
176  return true;
177 }
185 static void ShutdownLibraries()
186 {
187  // deinit the 3rd party libraries
189  DeinitCUrl();
191 #ifdef WIN32
192  // Cleanup Windows sockets for this process
193  WSACleanup();
194 #endif
195 }
206 static void SetupLicense( const std::string& ticket, const std::string& product,
207  LicenseData& licenseData )
208 {
209  licenseData.ticket = ticket;
210  licenseData.product = product;
211 }
222 static bool SetupConnectionData( const std::string& encData,
223  const std::string& encKey, DbConnectionData& cData )
224 {
225  const int iEncKey = atoi( encKey.c_str() );
227  if( iEncKey <= 0 )
228  return false;
230  cData.useLocalDatabase = false;
231  cData.dbType = DBT_Url;
232  cData.remoteServerData.encryptionData = encData;
233  cData.remoteServerData.encryptionKey = iEncKey;
235  return true;
236 }
246 static void PrintResults( const CategoriesInfo& catinfos,
247  const UrlClassificationResults& cats )
248 {
249  const DCA_SIZE_TYPE numOfCats = cats.size();
251  if( numOfCats == 0 ) {
252  std::cout << " No categories found." << std::endl;
253  return;
254  }
256  const Categories myCategories = catinfos.getCategories();
258  for( DCA_INDEX_TYPE i = 0; i < numOfCats; ++i ) {
259  const UrlClassificationResult result = cats[ i ];
260  const Category myCategory = myCategories.byId( result );
261  const std::string catname = myCategory.name( G_Locale );
263  if( myCategory != NullCategory ) {
264  std::cout << " " << (i+1) << ".\t Category '" << catname <<
265  "' (id=" << myCategory.id() <<
266  ", groupid=" << myCategory.groupId() <<
267  ")" << std::endl;
268  }
269  }
270 }
276 static void PrintToolHeader()
277 {
278  std::cout << "IBM DCA Sample: " << S_ToolName << " (" << S_ToolVersion
279  << ")" << std::endl;
280 }
286 static void PrintUsage()
287 {
288  std::cout << " usage:" << std::endl;
289  std::cout << S_UsageString << std::endl;
290 }
298 static void PrintDbConnectionInfo( const DbConnection& aDbConnection )
299 {
300  DatabaseInformation databaseInformation =
301  aDbConnection.getDatabaseInformation();
303  std::cout << "URL Database Version: " << databaseInformation.versionString
304  << " as of " << databaseInformation.creationDateUTC << std::endl;
305 }
312 static void PrintLicenseInfo( const License& aLicense )
313 {
314  const time_t expirationDate = aLicense.getExpirationDate();
315  struct tm *expirationTime = localtime( &expirationDate );
317  std::cout << "License Info:" << std::endl;
318  std::cout << " DCA is " << ( aLicense.isLicensed() ? "licensed." :
319  "not licensed." ) << std::endl;
320  std::cout << " MaxUsers:" << aLicense.getMaxUsers() <<
321  std::endl;
322  std::cout << " MaxSessions:" << aLicense.getMaxSessions() <<
323  std::endl;
324  std::cout << " Ticket:" << aLicense.getTicket() <<
325  std::endl;
326  std::cout << " Session:" << aLicense.getSession() <<
327  std::endl;
328  std::cout << " Last Message:" << aLicense.getLastMessage() <<
329  std::endl;
330  std::cout << " Expiration Date:" << asctime( expirationTime ) <<
331  std::endl;
332 }
341 static void LoadUrlFile( const std::string& fileName,
342  std::vector<std::string>& urlList )
343 {
344  std::ifstream fstream( fileName.c_str(), std::ios::in );
345  if (!fstream.is_open()) return;
347  std::string line;
349  while ( std::getline(fstream, line) )
350  {
351  if( !line.empty() && line[line.length()-1] == '\r')
352  line.erase( line.length() - 1 );
354  if( !line.empty() )
355  urlList.push_back( line );
356  }
357 }
378 void TestUrlClassification( const std::string& aUrlListFile,
379  const DcaInstance& myDca, const UrlDbClassifier& myUrlDbClassifier,
380  const CategoriesInfo& myCategoriesInfo )
381 {
382  size_t urlsRequested = 0;
383  size_t unknownUrls = 0;
384  size_t uncategerizedUrls = 0;
385  size_t categoriesFound = 0;
387  std::cout << "Entering URL db classification routine..." << std::endl;
389  UrlClassificationResults myUrlClassificationResults;
391  std::vector<std::string> myUrlList;
392  LoadUrlFile( aUrlListFile, myUrlList );
394  for( std::vector<std::string>::const_iterator U = myUrlList.begin(),
395  UEnd = myUrlList.end(); U != UEnd; ++U ) {
397  const std::string& myUrlString = *U;
399  std::cout << " Starting URL db classification for URL '" <<
400  myUrlString << "'" << std::endl;
402  // setup a URL for given string
403  const Url myUrl = Url::create( myDca, myUrlString );
405  ++urlsRequested;
407  // start the classification
408  FunctionResult myFR =
409  myUrlDbClassifier.classify( myUrl, myUrlClassificationResults );
410  if( !myFR ) {
411  // error occured.
412  std::cerr << " Error from URL db classification. Details: " <<
413  myFR.getDescription() << " (" << myFR.getReturnCode() <<
414  "). Continuing with next URL." << std::endl;
415  continue;
416  }
418  if( myUrlClassificationResults.isUnknownUrl() ) {
419  // the URL is not known in the database
420  std::cout << " Results: URL '" << myUrlString <<
421  "' is not known in the database. " << std::endl;
422  std::cout << " Continuing with next URL." << std::endl;
423  ++unknownUrls;
424  continue;
425  }
427  if( !myUrlClassificationResults.isCategorized() ) {
428  // the URL is known but does not contains any category. This is
429  // either a white-host or includes only categorized sub-folders.
430  std::cout << " Results: URL '" << myUrlString <<
431  "' is not categorized (but known in database). " << std::endl;
432  std::cout << " Continuing with next URL" << std::endl;
433  ++uncategerizedUrls;
434  continue;
435  }
437  // given URL is known and returned matched categories
438  std::cout << " Classification Results for URL '" << myUrlString <<
439  "'" << std::endl;
440  PrintResults( myCategoriesInfo, myUrlClassificationResults );
441  categoriesFound += myUrlClassificationResults.size();
443  }
445  std::cout << " Total Results: " << std::endl;
446  std::cout << " URLs requested:\t\t" << urlsRequested << std::endl;
447  std::cout << " URLs unknown:\t\t" << unknownUrls << std::endl;
448  std::cout << " URLs not categorized:\t" << uncategerizedUrls <<
449  std::endl;
450  std::cout << " URLs categorized:\t\t" <<
451  ( urlsRequested - unknownUrls - uncategerizedUrls ) << std::endl;
452  std::cout << " Categories found for URLs:\t" << categoriesFound <<
453  std::endl;
455  std::cout << "Leaving URL db classification routine." << std::endl;
456 }
466 std::string HexToString( const std::string& arg )
467 {
468  if( (arg.length()%2) != 0 || arg.find( "0x" ) != 0 ) // not a hex string
469  return arg;
471  const std::string cmd( arg.substr( 2 ) );
472  std::string result;
474  for( std::string::size_type i = 0; i < cmd.length(); i += 2 ) {
475  unsigned char hex[3] = { 0x00, 0x00, 0x00 };
476  hex[0] = cmd[i];
477  hex[1] = cmd[i+1];
478  unsigned int x = 0;
479  sscanf( (const char *)hex, "%02X", &x );
480  result += (unsigned char)x;
481  }
482  return result;
483 }
493 int main( int argc, char *argv[] )
494 {
495  PrintToolHeader();
497  int rc = 5;
499  try {
500  if( argc < 7 ) {
501  PrintUsage();
502  return 5;
503  }
505  std::string myRedistFolder = argv[ 1 ];
506  const std::string myTicket = argv[ 2 ];
507  const std::string myProduct = argv[ 3 ];
509  // encryption data may be entered in hex format 0xAB02FF....
510  // or as a common string if only ASCII characters are used
511  const std::string myEncData = HexToString( argv[ 4 ] );
513  const std::string myEncKey = argv[ 5 ];
514  const std::string myUrlList = argv[ 6 ];
516  if (argc > 7)
517  {
518  G_Locale = argv[7];
520  if (argc > 8)
521  {
522  G_LogLevel = static_cast<LogLevel>( atoi( argv[8] ) );
523  }
524  }
526  if( myRedistFolder.empty() || myTicket.empty() ||
527  myProduct.empty() || myUrlList.empty() ||
528  myEncData.empty() || myEncKey.empty() ) {
529  PrintUsage();
530  return 5;
531  }
533  // check for trailing fileslash - and add if necessary
534  const char c = myRedistFolder[ myRedistFolder.length() - 1 ];
535  if( c != '/' && c != '\\' )
536  myRedistFolder += "/";
538  // Initialize socket on Windows and 3rd party libraries
539  if( !StartupLibraries() )
540  return 5;
542  {
543  // setup DCA directories
544  InitData myInitData;
545  SetupInitData( myRedistFolder, myInitData );
547  // instantiate DCA API
548  DcaInstance myDca;
549  myDca = DcaInstance::create( myInitData );
551  // setup license data
552  LicenseData myLicenseData;
553  SetupLicense( myTicket, myProduct, myLicenseData );
554  const License myLicense = myDca.createLicense( myLicenseData,
555  ProxySettings(), G_LogLevel );
557  PrintLicenseInfo( myLicense );
559  if( myLicense.isLicensed( UrlClassification::ID ) ) {
560  // setup a signature database connection
561  DbConnectionData myDbConnectionData;
562  if ( !SetupConnectionData( myEncData, myEncKey, myDbConnectionData ) ) {
563  std::cout << "Could not setup remote connection because of wrong encryption data!" << std::endl;
564  rc = 5;
565  }
566  else {
567  const DbConnection myDbConnection =
568  myDca.createDbConnection( myLicense, myDbConnectionData,
569  ProxySettings(), G_LogLevel );
570  PrintDbConnectionInfo( myDbConnection );
572  // initialize the URL classification module and create a URL
573  // db classifier
574  // (statistics and unknown url upload disabled)
575  UrlClassification myUrlClassification =
576  UrlClassification::create( myDca, myLicense );
577  myUrlClassification.setLogLevel( G_LogLevel );
579  UrlDbClassifierOptions myUrlDbClassifierOptions;
580  myUrlDbClassifierOptions.enable_EmbeddedUrlDetection = true;
581  myUrlDbClassifierOptions.detect_EmbeddedUrlsInUrlPath = true;
582  myUrlDbClassifierOptions.enable_Feedback = false;
584  UrlDbClassifier myUrlDbClassifier =
585  myUrlClassification.createDbClassifier( myDbConnection, myUrlDbClassifierOptions );
586  myUrlDbClassifier.setLogLevel( G_LogLevel );
588  // create a categories info for printing out the category names
589  // together with the results per URL
590  const CategoriesInfo myCategoriesInfo =
593  // call URL Classification routine
594  TestUrlClassification( myUrlList, myDca, myUrlDbClassifier,
595  myCategoriesInfo );
597  rc = 0;
598  }
599  }
600  }
601  }
602  catch( const ExDca& ex ) {
603  std::cerr << "DCA Exception occured. Details: " << ex.getDescription()
604  << " (" << ex.getReturnCode() << ")." << std::endl;
605  rc = 10;
606  }
607  catch( const std::exception& s ) {
608  std::cerr << "std::exception occured. Details: " << s.what() << "." <<
609  std::endl;
610  rc = 10;
611  }
612  catch(...) {
613  std::cerr << "Unknown exception caught." << std::endl;
614  rc = 10;
615  }
617  // deinit the 3rd party libraries
618  ShutdownLibraries();
620  return rc;
621 }
