dca_interface  6.3.4
zla_samples/zlasample/main.cpp
1 /* IBM Source Code */
2 /* (C) Copyright IBM Corp. 2009, 2012 */
3 /* Licensed Materials - Property of IBM */
4 /* US Government Users Restricted Rights - Use duplication or disclosure restricted by GSA Schedule Contract with IBM Corp. */
5 
61 #include <string>
62 #include <vector>
63 #include <iostream>
64 #include <fstream>
65 #include <cstdlib>
66 #include <ctime>
67 #include <cstdio>
68 
69 #include "dca/dca_base.h"
71 #include "dca/dca_callbacks.h"
72 
73 using namespace dca;
74 
75 const std::string S_ToolName = "zlasample";
76 const std::string S_ToolVersion = "1.2";
77 
78 LogLevel G_LogLevel = LOG_Notice; // Default log level
79 
84 const std::string S_UsageString =
85 "<redist-folder> <ticket> <product> <encryption-data> <encryption-key> <email-list-file> [<log-level>]\n"
86  " redist-folder - the folder where the DCA is installed to\n"
87  " ticket - a valid ticket\n"
88  " product - the product associated with your ticket\n"
89  " hex-encryption-data - the encryption data (as hex string) included in your license\n"
90  " encryption-key - the encryption key included in your license\n"
91  " email-list-file - file that includes the emails to classify\n"
92  " log-level - optional log-level, default = 3 (LOG_Notice)\n\n";
93 
98 #ifdef WIN32
99 # define DCA_BINDIR "bin/Win32"
100 #else
101 # define DCA_BINDIR "bin/linux"
102 #endif
103 
108 #define DCA_INITDIR "init"
109 
113 #define DCA_LOGDIR "./logs"
114 
123 static void SetupInitData( const std::string& redist_folder, InitData& initData )
124 {
125  initData.binDir = redist_folder + DCA_BINDIR;
126  initData.initDir = redist_folder + DCA_INITDIR;
127  initData.logDir = DCA_LOGDIR;
128 }
129 
137 static void SetupLicense( const std::string& ticket, const std::string& product,
138  LicenseData& licenseData )
139 {
140  licenseData.ticket = ticket;
141  licenseData.product = product;
142 }
143 
152 static bool SetupConnectionData( const std::string& encData, const std::string& encKey,
153  DbConnectionData& cData )
154 {
155  const int iEncKey = atoi( encKey.c_str() );
156 
157  if( iEncKey <= 0 )
158  return false;
159 
160  cData.useLocalDatabase = false;
161  cData.dbType = DBT_Url;
162  cData.remoteServerData.encryptionData = encData;
163  cData.remoteServerData.encryptionKey = iEncKey;
164 
165  return true;
166 }
167 
173 static void PrintResult( const ZlaClassificationResult& result )
174 {
175  if( result.result() == ZLA_SPAM ) {
176  std::cout << " ZLA_SPAM";
177  }
178  else if( result.result() == ZLA_HAM ) {
179  std::cout << " ZLA_HAM";
180  }
181  else if( result.result() == ZLA_NDR ) {
182  std::cout << " ZLA_NDR";
183  }
184  else if( result.result() == ZLA_UNKNOWN ) {
185  std::cout << " ZLA_UNKNOWN";
186  }
187  else {
188  // should never happen
189  std::cout << " [UNKNOWN return from ZLA call]";
190  }
191  std::cout << "(" << (int)result.result() << ")";
192 
193  if( result.module() == DCA_NONE ) {
194  std::cout << ", No ZLA module";
195  }
196  else if( result.module() == DCA_ZLA_FLOW ) {
197  std::cout << ", DCA_ZLA_FLOW";
198  }
199  else if( result.module() == DCA_ZLA_SHINGLE_HEADERPRINT ) {
200  std::cout << ", DCA_ZLA_SHINGLE_HEADERPRINT";
201  }
202  else if( result.module() == DCA_ZLA_SHINGLE_BAYES ) {
203  std::cout << ", DCA_ZLA_SHINGLE_BAYES";
204  }
205  else if( result.module() == DCA_ZLA_SHINGLE_PRINT ) {
206  std::cout << ", DCA_ZLA_SHINGLE_PRINT";
207  }
208  else if( result.module() == DCA_ZLA_URL ) {
209  std::cout << ", DCA_ZLA_URL";
210  }
211  else if( result.module() == DCA_ZLA_BLOCKALLOW ) {
212  std::cout << ", DCA_ZLA_BLOCKALLOW";
213  }
214  else if( result.module() == DCA_ZLA_DBBLOCKALLOW ) {
215  std::cout << ", DCA_ZLA_DBBLOCKALLOW";
216  }
217  else if( result.module() == DCA_ZLA_NDR ) {
218  std::cout << ", DCA_ZLA_NDR";
219  }
220  else {
221  // should never happen
222  std::cout << ", [UNKNOWN module from ZLA call]";
223  }
224  std::cout << "(" << (int)result.module() << ")" << std::endl;
225 }
226 
231 static void PrintToolHeader()
232 {
233  std::cout << "IBM DCA Sample: " << S_ToolName << " (" << S_ToolVersion << ")" << std::endl;
234 }
235 
240 static void PrintUsage()
241 {
242  std::cout << " usage:" << std::endl;
243  std::cout << S_UsageString << std::endl;
244 }
245 
251 static void PrintDbConnectionInfo( const DbConnection& aDbConnection )
252 {
253  std::cout << "Remote Database version is: " << aDbConnection.getDatabaseVersion() << std::endl;
254 }
255 
261 static void PrintLicenseInfo( const License& aLicense )
262 {
263  const time_t expirationDate = aLicense.getExpirationDate();
264  struct tm *expirationTime = localtime( &expirationDate );
265 
266  std::cout << "License Info:" << std::endl;
267  std::cout << " DCA is " << ( aLicense.isLicensed() ? "licensed." :
268  "not licensed." ) << std::endl;
269  std::cout << " MaxUsers:" << aLicense.getMaxUsers() <<
270  std::endl;
271  std::cout << " MaxSessions:" << aLicense.getMaxSessions() <<
272  std::endl;
273  std::cout << " Ticket:" << aLicense.getTicket() <<
274  std::endl;
275  std::cout << " Session:" << aLicense.getSession() <<
276  std::endl;
277  std::cout << " Last Message:" << aLicense.getLastMessage() <<
278  std::endl;
279  std::cout << " Expiration Date:" << asctime( expirationTime ) <<
280  std::endl;
281 }
282 
289 static void LoadEmailFile( const std::string& fileName, std::vector<std::string>& emailList )
290 {
291  std::ifstream fstream( fileName.c_str(), std::ios::in );
292  if (!fstream.is_open()) return;
293 
294  std::string line;
295 
296  while ( std::getline(fstream, line) )
297  {
298  if( !line.empty() && line[line.length()-1] == '\r')
299  line.erase( line.length() - 1 );
300 
301  if( !line.empty() )
302  emailList.push_back( line );
303  }
304 }
305 
312 static bool LoadEmail( const std::string& fileName, std::string& emailContents )
313 {
314  emailContents.clear();
315 
316  FILE * file = fopen( fileName.c_str(), "rb" );
317  if(!file )
318  return false;
319 
320  fseek( file, 0, SEEK_END );
321  const int iFileLength = ftell( file );
322  fseek( file, 0, SEEK_SET );
323  if( !iFileLength ) {
324  fclose( file );
325  return false;
326  }
327 
328  char* pszBuffer = new char[ iFileLength ];
329  if (!pszBuffer) { // no mem available!
330  fclose( file );
331  return false;
332  }
333 
334  const size_t iBytesRead = fread( pszBuffer, 1, iFileLength, file );
335  emailContents.append( pszBuffer, iBytesRead );
336 
337  delete[] pszBuffer;
338  fclose( file );
339 
340  return !emailContents.empty();
341 }
342 
358 void TestZlaClassification( const std::string& aEmailListFile, const DcaInstance& myDca,
359  const ZlaClassifier& myZlaClassifier )
360 {
361  size_t emailsRequested = 0;
362 
363  std::cout << "Entering ZLA classification routine..." << std::endl;
364 
365  ZlaClassificationResult myZlaClassificationResult;
366 
367  std::vector<std::string> myEmailList;
368  LoadEmailFile( aEmailListFile, myEmailList );
369 
370  for( std::vector<std::string>::const_iterator E = myEmailList.begin(),
371  EEnd = myEmailList.end(); E != EEnd; ++E ) {
372 
373  const std::string& myEmailString = *E;
374 
375  std::cout << " Starting ZLA classification for email '" <<
376  myEmailString << "'" << std::endl;
377 
378  // Load email contents into given string
379  std::string emailContent;
380  if( !LoadEmail( myEmailString, emailContent ) ) {
381  std::cout << "Error loading given email file: " << myEmailString << ", continuing with next file." << std::endl;
382  continue;
383  }
384 
385  // setup a Email object for given string
386  const Email myEmail = Email::create( myDca, emailContent );
387 
388  ++emailsRequested;
389 
390  // start the classification
391  FunctionResult myFR = myZlaClassifier.classify( myEmail, myZlaClassificationResult );
392  if( !myFR ) {
393  // error occured.
394  std::cerr << " Error from ZLA classification. Details: " << myFR.getDescription() <<
395  " (" << myFR.getReturnCode() << "). Continuing with next email." << std::endl;
396  continue;
397  }
398 
399  std::cout << " Classification Result for email '" << myEmailString << "'" << std::endl;
400  PrintResult( myZlaClassificationResult );
401  }
402 
403  std::cout << " Total Results: " << std::endl;
404  std::cout << " Emails requested:\t\t" << emailsRequested << std::endl;
405 
406  std::cout << "Leaving ZLA classification routine." << std::endl;
407 }
408 
416 std::string HexToString( const std::string& arg )
417 {
418  if( (arg.length()%2) != 0 || arg.find( "0x" ) != 0 ) // not a hex string
419  return arg;
420 
421  const std::string cmd( arg.substr( 2 ) );
422  std::string result;
423 
424  for( std::string::size_type i = 0; i < cmd.length(); i += 2 ) {
425  unsigned char hex[3] = { 0x00, 0x00, 0x00 };
426  hex[0] = cmd[i];
427  hex[1] = cmd[i+1];
428  unsigned int x = 0;
429  sscanf( (const char *)hex, "%02X", &x );
430  result += (unsigned char)x;
431  }
432  return result;
433 }
434 
442 int main( int argc, char *argv[] )
443 {
444  PrintToolHeader();
445 
446  int rc = 5;
447 
448  try {
449  if( argc < 7 ) {
450  PrintUsage();
451  return 5;
452  }
453 
454  std::string myRedistFolder = argv[ 1 ];
455  const std::string myTicket = argv[ 2 ];
456  const std::string myProduct = argv[ 3 ];
457 
458  // encryption data may be entered in hex format 0xAB02FF....
459  // or as a common string if only ASCII characters are used
460  const std::string myEncData = HexToString( argv[ 4 ] );
461 
462  const std::string myEncKey = argv[ 5 ];
463  const std::string myEmailList = argv[ 6 ];
464 
465  if (argc > 7)
466  {
467  G_LogLevel = static_cast<LogLevel>( atoi( argv[7] ) );
468  }
469 
470  if( myRedistFolder.empty() || myTicket.empty() ||
471  myProduct.empty() || myEmailList.empty() ||
472  myEncData.empty() || myEncKey.empty() ) {
473  PrintUsage();
474  return 5;
475  }
476 
477  // check for trailing fileslash - and add if necessary
478  char c = myRedistFolder[ myRedistFolder.length() - 1 ];
479  if( c != '/' && c != '\\' )
480  myRedistFolder += "/";
481 
482  // init the 3rd party libraries
483  InitCUrl();
485 
486  {
487  // setup DCA directories
488  InitData myInitData;
489  SetupInitData( myRedistFolder, myInitData );
490 
491  // instantiate DCA API
492  DcaInstance myDca;
493  myDca = DcaInstance::create( myInitData );
494 
495  // setup license data
496  LicenseData myLicenseData;
497  SetupLicense( myTicket, myProduct, myLicenseData );
498  const License myLicense = myDca.createLicense( myLicenseData, ProxySettings(), G_LogLevel );
499 
500  PrintLicenseInfo( myLicense );
501 
502  if( myLicense.isLicensed( ZlaClassification::ID ) ) {
503  // setup a signature database connection
504  DbConnectionData myDbConnectionData;
505  SetupConnectionData( myEncData, myEncKey, myDbConnectionData );
506  const DbConnection myDbConnection = myDca.createDbConnection( myLicense, myDbConnectionData, ProxySettings(), G_LogLevel );
507  PrintDbConnectionInfo( myDbConnection );
508 
509  // initialize the ZLA classification module and create a ZLA classifier
510  ZlaClassification myZlaClassification = ZlaClassification::create( myDca, myLicense );
511  const ZlaClassifier myZlaClassifier = myZlaClassification.createClassifier( myDbConnection );
512 
513  // call ZLA Classification routine
514  TestZlaClassification( myEmailList, myDca, myZlaClassifier );
515  rc = 0;
516  }
517  }
518  }
519  catch( const ExDca& ex ) {
520  std::cerr << "DCA Exception occured. Details: " << ex.getDescription() <<
521  " (" << ex.getReturnCode() << ")." << std::endl;
522  rc = 10;
523  }
524  catch( const std::exception& s ) {
525  std::cerr << "std::exception occured. Details: " << s.what() << "." << std::endl;
526  rc = 10;
527  }
528  catch(...) {
529  std::cerr << "Unknown exception caught." << std::endl;
530  rc = 10;
531  }
532 
533  // deinit the 3rd party libraries
535  DeinitCUrl();
536 
537  return rc;
538 }
Is used to create a License object. A license first must be created with DcaInstance::createLicense t...
Definition: base_classes.h:547
Exception class used in the DCA.
Definition: base_classes.h:237
const DCA_ZLA_TYPE DCA_ZLA_URL
The URL module.
DCA_ZLA_RESULT result() const
Returns the result for the ZLA classification.
void InitCUrl()
Initializes libcurl. Do not use any DCA function before initializing libcurl.
std::string initDir
the directory in which the DCA init files are stored
Definition: base_classes.h:266
ZLA classifier object for ZLA classification.
int getMaxSessions() const
Returns the maximum allowed sessions associated with your ticket/license.
time_t getExpirationDate() const
Returns the expiration date of the license in UTC.
std::string getDescription() const
Returns a description of the error.
static ZlaClassification create(const DcaInstance &aDcaInstance, const License &aLicense)
Initializes the ZlaClassification module.
DCA_RESULT_TYPE getReturnCode() const
Returns the last error code (if any).
static void SetupInitData(const std::string &redist_folder, InitData &initData)
Sets up the given initData by substituting the given redist_folder with DCA subdirectories.
static DCA_MODULE_ID_TYPE ID
The unique ID of the ZLA classification module.
const DCA_ZLA_TYPE DCA_NONE
Initial/no module.
std::string binDir
the directory in which the DCA binary (*.dca) files are stored
Definition: base_classes.h:265
This header includes initialization/deinitialization support functions for the 3rd party libraries us...
unsigned int encryptionKey
The encryption key to be used (provided with your license)
Definition: base_classes.h:779
const DCA_ZLA_TYPE DCA_ZLA_NDR
The Non-Delivery-Report module.
@ LOG_Notice
Write notices / important information to the log file.
Definition: base_classes.h:217
void SetOpenSslCallbacks()
Initializes the required callbacks for OpenSSL when using HTTPS or SSL connections in a multi-threade...
Stores the connection data for a database.
Definition: base_classes.h:815
void UnsetOpenSslCallbacks()
Unsets the openssl callbacks. Do not call any DCA function after you have called this function.
const DCA_ZLA_RESULT ZLA_HAM
The email data is classified as HAM.
int getMaxUsers() const
Returns the maximum allowed users associated with your ticket/license.
void DeinitCUrl()
Deinitializes libcurl. Do not call any DCA function after you have called this function.
The ZLA Classification module class.
Database connection class for a local or remote database.
Definition: base_classes.h:859
const DCA_ZLA_TYPE DCA_ZLA_FLOW
The Flow Control module.
Encapsulates an email object.
Definition: base_email.h:24
#define DCA_LOGDIR
Relative directory for logfile(s).
#define DCA_INITDIR
DCA subdirectory of the DCA initialization data.
const DCA_ZLA_TYPE DCA_ZLA_DBBLOCKALLOW
The DB Block Allow module.
DCA_RESULT_TYPE getReturnCode() const
Gets the code of the error.
const DCA_ZLA_RESULT ZLA_UNKNOWN
The email data is unknown i.e. there was not enough data for classification or no ZLA match.
static void PrintToolHeader()
Prints out the name and the version of this sample.
This header includes all header files of the ZLA Classification Package.
LogLevel
This enum is used in all setLogLevel() functions to change the verbosity level of the classes.
Definition: base_classes.h:212
Use a License to initialize a classification package or a toolbox package.
Definition: base_classes.h:560
If you are using one or more proxy servers set up this structure and use it for e....
Definition: base_classes.h:275
const DCA_ZLA_RESULT ZLA_SPAM
The email data is classified as SPAM.
bool isLicensed(DCA_MODULE_ID_TYPE id=0, bool force=false) const
Checks whether the given License is valid for the given module id.
static void PrintLicenseInfo(const License &aLicense)
Prints out the information about the provided License.
std::string ticket
The ticket as provided in the license.
Definition: base_classes.h:548
This header includes all header files of the DCA Base Package.
Overall result of a ZLA classification.
bool useLocalDatabase
Set to true to connect to a local or custom database, set to false to use a remote database.
Definition: base_classes.h:821
static Email create(const DcaInstance &aDcaInstance, const std::string &emailContent)
Creates an email object, used as an input parameter for ZLA classification.
Encapsulates the init and deinit of the DCA API.
Definition: base_classes.h:315
std::string product
The product code used with the license.
Definition: base_classes.h:549
DCA_ZLA_TYPE module() const
Returns the module type for the ZLA classification.
const DCA_ZLA_RESULT ZLA_NDR
The email data is classified as NDR (non-delivery-report)
const DCA_ZLA_TYPE DCA_ZLA_BLOCKALLOW
The Block Allow module.
FunctionResult classify(const Email &anEmail, ZlaClassificationResult &aZlaResult) const
The email classification method. The method takes an initialized Email object and returns the results...
const DbType DBT_Url
Used for DbConnection classes for URL classification.
std::string logDir
the directory in which the DCA log file should be created
Definition: base_classes.h:267
static void PrintUsage(const char *name)
Prints out the syntax of the sample.
std::string getLastMessage() const
Returns the last message received from our license server or if none available the last available mes...
const DCA_ZLA_TYPE DCA_ZLA_SHINGLE_HEADERPRINT
The Shingle Header Print module.
DbType dbType
The type of the database.
Definition: base_classes.h:820
std::string getDescription() const
Returns the description for the error or warning.
const DCA_ZLA_TYPE DCA_ZLA_SHINGLE_PRINT
The Shingle Print module.
DbConnectionRemoteServerData remoteServerData
If you are creating a remote database connection, this structure must be filled out with the encrypti...
Definition: base_classes.h:824
std::string getTicket() const
Returns the ticket of the license as string.
#define DCA_BINDIR
DCA subdirectory of the DCA binaries.
Standard function result.
Definition: base_classes.h:148
std::string getSession() const
Returns the session of the license as string.
const DCA_ZLA_TYPE DCA_ZLA_SHINGLE_BAYES
The Shingle Bayes module.
const std::string S_UsageString
Usage string, displayed if a parameter is missing.
ZlaClassifier createClassifier(const DbConnection &aDbConnection) const
Creates a ZlaClassifier used to classify Email objects.
DbConnection createDbConnection(const License &aLicense, const DbConnectionData &dbcData, const ProxySettings &proxySettings=ProxySettings(), LogLevel aLogLevel=LOG_Initial) const
Creates a DbConnection object using the given DbConnectionData.
This structure is used to initialize the DcaInstance.
Definition: base_classes.h:264
std::string getDatabaseVersion() const
Returns the currently used database version.
static DcaInstance create(const InitData &initData)
Creates a DcaInstance, starts up the DCA API and initializes the required main module.
static void SetupLicense(const std::string &ticket, const std::string &product, LicenseData &licenseData)
Sets up the given licenseData by copying the given ticket and product strings.
std::string encryptionData
The encryption data to be used (provided with your license)
Definition: base_classes.h:778
License createLicense(const LicenseData &licData, const ProxySettings &proxySettings=ProxySettings(), LogLevel aLogLevel=LOG_Initial) const
Creates a License object using the given LicenseData.
int main(int argc, char *argv[])
The main routine.