dca_interface  6.3.4
malware_samples/malwaresample/main.cpp
1 /* IBM Source Code */
2 /* (C) Copyright IBM Corp. 2009, 2012 */
3 /* Licensed Materials - Property of IBM */
4 /* US Government Users Restricted Rights - Use duplication or disclosure restricted by GSA Schedule Contract with IBM Corp. */
5 
35 #include <string>
36 #include <vector>
37 #include <iostream>
38 #include <fstream>
39 #include <ctime>
40 
41 #include "dca/dca_base.h"
43 #include "dca/dca_callbacks.h"
44 
45 #ifdef WIN32
46 # include <winsock2.h>
47 #endif
48 
49 using namespace dca;
50 using namespace dca_malware;
51 
52 const std::string S_ToolName = "malwaresample";
53 const std::string S_ToolVersion = "1.0";
54 
59 const std::string S_UsageString =
60  "<dca-redist-folder> <ticket> <product> <input-file>\n"
61  " dca-redist-folder - the folder where the DCA is installed\n"
62  " ticket - a valid ticket\n"
63  " product - the product associated with your ticket\n"
64  " input-file - a file with sample hashes\n\n"
65  ;
66 
71 #ifdef WIN32
72 # define DCA_BINDIR "bin/Win32"
73 #else
74 # define DCA_BINDIR "bin/linux"
75 #endif
76 
81 #define DCA_INITDIR "init"
82 
86 #define DCA_LOGDIR "./logs"
87 
98 static void SetupInitData( const std::string& strRedistFolder,
99  InitData& aInitData )
100 {
101  aInitData.binDir = strRedistFolder + DCA_BINDIR;
102  aInitData.initDir = strRedistFolder + DCA_INITDIR;
103  aInitData.logDir = DCA_LOGDIR;
104 }
105 
117 static bool StartupLibraries()
118 {
119 #ifdef WIN32
120  // Windows needs an extra socket-startup for this process to work
121  // correctly with e.g. IP(v6) input IP addresses
122  WORD wVersionRequested = MAKEWORD( 2, 2 );
123  WSADATA wsaData;
124  int err = WSAStartup( wVersionRequested, &wsaData );
125  if ( err != 0 ) {
126  std::cout << "Error on WSAStartup (" << err << ") occured, aborting" <<
127  std::endl;
128  return false;
129  }
130 #endif
131 
132  // init the 3rd party libraries
133  InitCUrl();
135  return true;
136 }
137 
144 static void ShutdownLibraries()
145 {
146  // deinit the 3rd party libraries
148  DeinitCUrl();
149 
150 #ifdef WIN32
151  // Cleanup Windows sockets for this process
152  WSACleanup();
153 #endif
154 }
155 
164 static void SetupLicense( const std::string& strTicket,
165  const std::string& strProduct,
166  LicenseData& aLicenseData )
167 {
168  aLicenseData.ticket = strTicket;
169  aLicenseData.product = strProduct;
170 }
171 
180 static void SetupConnectionData( DbConnectionData& aDbConnectionData )
181 {
182  aDbConnectionData.useLocalDatabase = true;
183  aDbConnectionData.dbType = DBT_Malware;
184 }
185 
193 static void PrintDbConnectionInfo( const DbConnection& aDbConnection )
194 {
195  DatabaseInformation databaseInformation =
196  aDbConnection.getDatabaseInformation();
197 
198  std::cout << "Malware Database Version: " << databaseInformation.versionString
199  << " as of " << databaseInformation.creationDateUTC << std::endl;
200 }
201 
208 static void PrintLicenseInfo( const License& aLicense )
209 {
210  const time_t expirationDate = aLicense.getExpirationDate();
211  struct tm *expirationTime = localtime( &expirationDate );
212 
213  std::cout << "License Info:" << std::endl;
214  std::cout << " DCA is " << ( aLicense.isLicensed() ? "licensed." :
215  "not licensed." ) << std::endl;
216  std::cout << " MaxUsers:" << aLicense.getMaxUsers() <<
217  std::endl;
218  std::cout << " MaxSessions:" << aLicense.getMaxSessions() <<
219  std::endl;
220  std::cout << " Ticket:" << aLicense.getTicket() <<
221  std::endl;
222  std::cout << " Session:" << aLicense.getSession() <<
223  std::endl;
224  std::cout << " Last Message:" << aLicense.getLastMessage() <<
225  std::endl;
226  std::cout << " Expiration Date:" << asctime( expirationTime ) <<
227  std::endl;
228 }
229 
234 static void PrintToolHeader()
235 {
236  std::cout << "IBM DCA Sample: " << S_ToolName << " (" << S_ToolVersion <<
237  ")" << std::endl;
238 }
239 
246 static void PrintUsage( const char *pSampleName )
247 {
248  std::cout << pSampleName << " usage:" << std::endl;
249  std::cout << S_UsageString << std::endl;
250 }
251 
262 void PrintResult( const MalwareClassificationResult& aMalwareClassificationResult,
263  const MalwareCategories& aMalwareCategories,
264  const MalwareEnums& aMalwareEnums,
265  const std::string& strIndent = "",
266  const std::string& strValueDesc = "" )
267 {
268  // each result item contains of a category id and a value related to the
269  // category
270  DCA_CATEGORY_ID_TYPE myCatId = aMalwareClassificationResult.categoryId;
271  int myValue = aMalwareClassificationResult.value;
272 
273  // retrieve the category object to print out its name
274  MalwareCategory myMalwareCategory = aMalwareCategories.byId( myCatId );
275 
276  std::cout << strIndent << "MalwareCategory '" << myMalwareCategory.name() <<
277  "' (id=" << myCatId << ")" << std::endl;
278 
279  // if the matched category contains one or more related MalwareEnum objects,
280  // print out the matched one
281  const DCA_SIZE_TYPE numOfEnums = myMalwareCategory.enumsSize();
282  if( numOfEnums > 0 ) {
283  // the retrieved value points to a unique MalwareEnum object id
284  int enumId = myMalwareCategory.enumByValue( myValue );
285  // retrieve the object with the enumId
286  const MalwareEnum myMalwareEnum = aMalwareEnums.byId( enumId );
287  std::cout << strIndent << "\t MalwareEnum '" << myMalwareEnum.name() << "' (id=" <<
288  enumId << ")" << std::endl;
289  }
290  else {
291  // no Enum object retrieved print out the value
292  std::cout << strIndent << "\tValue " << myValue << strValueDesc << std::endl;
293  }
294 }
295 
303 void PrintResults( const MalwareClassificationResults& aMalwareClassificationResults,
304  const MalwareCategoriesInfo& aMalwareCategoriesInfo )
305 {
306  if( aMalwareClassificationResults.isUnknownMalware() ) {
307  std::cout << "Results: Unknown" << std::endl;
308  return;
309  }
310  if( !aMalwareClassificationResults.isCategorized() ) {
311  std::cout << "Results: None" << std::endl;
312  return;
313  }
314 
315  const DCA_SIZE_TYPE numOfResults = aMalwareClassificationResults.size();
316 
317  std::cout << "Results (" << numOfResults << ") categories" << std::endl;
318 
319  const MalwareCategories myMalwareCategories = aMalwareCategoriesInfo.getCategories();
320  const MalwareEnums myMalwareEnums = aMalwareCategoriesInfo.getEnums();
321 
322  for( DCA_INDEX_TYPE i = 0; i < numOfResults; ++i ) {
323  const MalwareClassificationResult myMalwareClassificationResult = aMalwareClassificationResults[ i ];
324  PrintResult( myMalwareClassificationResult, myMalwareCategories, myMalwareEnums );
325  }
326 }
327 
336 void DoCallMalwareClassification( const std::vector<std::string>& aHashStrings,
337  const MalwareDbClassifier& aMalwareClassifier,
338  const MalwareCategoriesInfo& aMalwareCategoriesInfo )
339 {
340  std::vector< std::string >::const_iterator I = aHashStrings.begin();
341  std::vector< std::string >::const_iterator IEnd = aHashStrings.end();
342 
343  MalwareClassificationResults myMalwareClassificationResults;
344  FunctionResult myFR;
345 
346  for( ; I != IEnd; ++I ) {
347  const std::string& strHash = *I;
348 
349  std::cout << std::endl << "MALWARE Classification ('" << strHash << "')..." << std::endl;
350  unsigned char hashBuffer[33] = {
351  0, 0, 0, 0, 0, 0, 0, 0,
352  0, 0, 0, 0, 0, 0, 0, 0,
353  0, 0, 0, 0, 0, 0, 0, 0,
354  0, 0, 0, 0, 0, 0, 0, 0,
355  0
356  };
357  const char* hashPtr = strHash.c_str();
358  bool data_ok = true;
359  if (strHash.length() >= 64 && hashPtr) {
360  // hash string
361  for (int i = 0; i < 32; i++) {
362  unsigned char value = 0;
363  char char0 = *(hashPtr + (2*i));
364  char char1 = *(hashPtr + (2*i) + 1);
365  if (char0 >= 'A' && char0 <= 'F') {
366  char0 = char0 + 32;
367  }
368  if (char0 >= '0' && char0 <= '9') {
369  value = (unsigned char)((char0 - '0') << 4);
370  }
371  else if (char0 >= 'a' && char0 <= 'f') {
372  value = (unsigned char)((char0 - 'A' + 10) << 4);
373  }
374  else {
375  data_ok = false;
376  }
377  if (char1 >= 'A' && char1 <= 'F') {
378  char1 = char1 + 32;
379  }
380  if (char1 >= '0' && char1 <= '9') {
381  value |= ( (char1 - '0') & 0x0fU );
382  }
383  else if (char1 >= 'a' && char1 <= 'f') {
384  value |= ( (char1 - 'A' + 10) & 0x0fU );
385  }
386  else {
387  data_ok = false;
388  }
389  if (data_ok) {
390  hashBuffer[i] = value;
391  }
392  }
393  }
394 
395  if (data_ok) {
396  const Hash myHash(hashBuffer, 32);
397 
398  myFR = aMalwareClassifier.classify( myHash, myMalwareClassificationResults );
399 
400  if( !myFR ) {
401  std::cout << "Error: rc=" << myFR.getReturnCode() <<
402  ", description=" << myFR.getDescription() << std::endl;
403  }
404  else PrintResults( myMalwareClassificationResults, aMalwareCategoriesInfo );
405  }
406  }
407 }
408 
417 void TestMalwareClassification( const MalwareDbClassifier& aMalwareClassifier,
418  const MalwareCategoriesInfo& aMalwareCategoriesInfo,
419  const std::string& strFileName )
420 {
421  std::ifstream fstream( strFileName.c_str(), std::ios::in );
422  if ( !fstream.is_open() ) {
423  std::cout << "Error: File '" << strFileName << "' could not be opened!" << std::endl;
424  return;
425  }
426 
427  std::string strLine;
428  std::vector< std::string > myHashStrings;
429 
430  while( std::getline( fstream, strLine ) ) {
431  if( strLine.empty() )
432  continue;
433 
434  // delete all \r\n at the end of the line
435  while( strLine[strLine.length()-1] == '\r' ||
436  strLine[strLine.length()-1] == '\n' )
437  strLine.erase( strLine.length() - 1 );
438 
439  if( strLine.empty() )
440  continue;
441 
442  if( strLine[0] == '#' ) // allow comment lines started with a # at first char
443  continue;
444 
445  // add line to myHashStrings
446  myHashStrings.push_back( strLine );
447  }
448 
449  fstream.close();
450 
451  if( !myHashStrings.empty() )
452  DoCallMalwareClassification( myHashStrings, aMalwareClassifier, aMalwareCategoriesInfo );
453  else
454  std::cout << "Could not read any hash from file!" << std::endl;
455 }
456 
465 int main( int argc, char *argv[] )
466 {
467  PrintToolHeader();
468 
469  int rc = 5;
470 
471  try {
472  if( argc < 5 ) {
473  PrintUsage( argv[0] );
474  return 5;
475  }
476 
477  std::string strRedistFolder = argv[ 1 ];
478  const std::string strTicket = argv[ 2 ];
479  const std::string strProduct = argv[ 3 ];
480  const std::string strFileName = argv[ 4 ];
481 
482  if( strRedistFolder.empty() ||
483  strTicket.empty() ||
484  strProduct.empty() ||
485  strFileName.empty() )
486  {
487  PrintUsage( argv[0] );
488  return 5;
489  }
490 
491  // check for trailing fileslash - and add if necessary
492  const char c = strRedistFolder[ strRedistFolder.length() - 1 ];
493  if( c != '/' && c != '\\' )
494  strRedistFolder += "/";
495 
496  // Initialize socket on Windows and 3rd party libraries
497  if( !StartupLibraries() )
498  return 5;
499 
500  {
501  // setup DCA directories
502  InitData myInitData;
503  SetupInitData( strRedistFolder, myInitData );
504 
505  // instantiate DCA API
506  DcaInstance myDca;
507  myDca = DcaInstance::create( myInitData );
508 
509  // setup license data
510  LicenseData myLicenseData;
511  SetupLicense( strTicket, strProduct, myLicenseData );
512  const License myLicense = myDca.createLicense( myLicenseData );
513 
514  PrintLicenseInfo( myLicense );
515 
516  if( myLicense.isLicensed( MalwareClassification::ID ) ) {
517  // initialize the Malware Classification module
518  const MalwareClassification myMalwareClassification =
519  MalwareClassification::create( myDca, myLicense );
520 
521  DbConnectionData myDbConnectionData;
522  SetupConnectionData( myDbConnectionData );
523 
524  // A DbConnection data to a local database must exist to use
525  // MalwareClassification
526  const DbConnection myDbConnection =
527  myDca.createDbConnection( myLicense, myDbConnectionData );
528  PrintDbConnectionInfo( myDbConnection );
529 
530  // create a MalwareDbClassifier to use in the TestMalwareClassification()
531  // function
532  MalwareDbClassifierOptions myMalwareDbClassifierOptions;
533  myMalwareDbClassifierOptions.enable_Feedback = false;
534  const MalwareDbClassifier myClassifier =
535  myMalwareClassification.createDbClassifier( myDbConnection, myMalwareDbClassifierOptions );
536 
537  // create a categories info for printing out the result
538  // categories and values per input data
539  const MalwareCategoriesInfo myCategoriesInfo =
540  myMalwareClassification.getCategoriesInfo();
541 
542  // call classification routine
543  TestMalwareClassification( myClassifier, myCategoriesInfo,
544  strFileName );
545 
546  rc = 0;
547  }
548  }
549  }
550  catch( const ExDca& ex ) {
551  std::cerr << "DCA Exception occured. Details: " << ex.getDescription()
552  << " (" << ex.getReturnCode() << ")." << std::endl;
553  rc = 10;
554  }
555  catch( const std::exception& s ) {
556  std::cerr << "std::exception occured. Details: " << s.what() << "." <<
557  std::endl;
558  rc = 10;
559  }
560  catch(...) {
561  std::cerr << "Unknown exception caught." << std::endl;
562  rc = 10;
563  }
564 
565  // deinit the 3rd party libraries
566  ShutdownLibraries();
567 
568  return rc;
569 }
The result item of a Malware classification.
MalwareCategory byId(DCA_CATEGORY_ID_TYPE catid) const
Returns the category with the given category id.
Is used to create a License object. A license first must be created with DcaInstance::createLicense t...
Definition: base_classes.h:547
Main class for the Malware classification.
Exception class used in the DCA.
Definition: base_classes.h:237
Definition of the Hash class.
DCA_CATEGORY_ID_TYPE categoryId
A Malware category id.
void InitCUrl()
Initializes libcurl. Do not use any DCA function before initializing libcurl.
std::string initDir
the directory in which the DCA init files are stored
Definition: base_classes.h:266
This header includes all header files of the Malware Classification Package.
int getMaxSessions() const
Returns the maximum allowed sessions associated with your ticket/license.
time_t getExpirationDate() const
Returns the expiration date of the license in UTC.
std::string getDescription() const
Returns a description of the error.
The MalwareCategory class contains information for a single category.
DatabaseInformation getDatabaseInformation() const
Returns information about the underlying database.
Contains information about underlying database.
Definition: base_classes.h:834
DCA_RESULT_TYPE getReturnCode() const
Returns the last error code (if any).
static void SetupInitData(const std::string &redist_folder, InitData &initData)
Sets up the given initData by substituting the given redist_folder with DCA subdirectories.
std::string creationDateUTC
Definition: base_classes.h:842
Container class for all Malware enumeration objects.
std::string binDir
the directory in which the DCA binary (*.dca) files are stored
Definition: base_classes.h:265
This header includes initialization/deinitialization support functions for the 3rd party libraries us...
std::string name(const std::string &localeString=std::string()) const
Returns the localized (display) name of the category.
void SetOpenSslCallbacks()
Initializes the required callbacks for OpenSSL when using HTTPS or SSL connections in a multi-threade...
const DbType DBT_Malware
Used to specify an Malware Classification database when creating a dca::DbConnection instance.
Stores the connection data for a database.
Definition: base_classes.h:815
void UnsetOpenSslCallbacks()
Unsets the openssl callbacks. Do not call any DCA function after you have called this function.
DCA_ENUM_ID_TYPE enumByValue(int value) const
Returns the id of an enum item associated with the current MalwareCategory.
int getMaxUsers() const
Returns the maximum allowed users associated with your ticket/license.
void DeinitCUrl()
Deinitializes libcurl. Do not call any DCA function after you have called this function.
Database connection class for a local or remote database.
Definition: base_classes.h:859
#define DCA_LOGDIR
Relative directory for logfile(s).
Definition of a container class for MalwareEnum objects.
Definition: malware_enums.h:30
#define DCA_INITDIR
DCA subdirectory of the DCA initialization data.
Malware database classifier class.
DCA_RESULT_TYPE getReturnCode() const
Gets the code of the error.
std::string name(const std::string &localeString=std::string()) const
Returns the localized (display) name of the enumerable item.
DCA_SIZE_TYPE size() const
Returns the number of results in the container.
static void PrintToolHeader()
Prints out the name and the version of this sample.
Use a License to initialize a classification package or a toolbox package.
Definition: base_classes.h:560
Results of a Malware classification.
MalwareCategories getCategories() const
Returns the contained MalwareCategories.
bool isLicensed(DCA_MODULE_ID_TYPE id=0, bool force=false) const
Checks whether the given License is valid for the given module id.
static void PrintLicenseInfo(const License &aLicense)
Prints out the information about the provided License.
std::string ticket
The ticket as provided in the license.
Definition: base_classes.h:548
This header includes all header files of the DCA Base Package.
std::string versionString
Definition: base_classes.h:840
bool useLocalDatabase
Set to true to connect to a local or custom database, set to false to use a remote database.
Definition: base_classes.h:821
bool isCategorized() const
Returns whether or not the Malware matched one or more categories.
Encapsulates the init and deinit of the DCA API.
Definition: base_classes.h:315
std::string product
The product code used with the license.
Definition: base_classes.h:549
int DCA_CATEGORY_ID_TYPE
Type for category ids.
Definition: base_types.h:31
std::string logDir
the directory in which the DCA log file should be created
Definition: base_classes.h:267
static void PrintUsage(const char *name)
Prints out the syntax of the sample.
std::string getLastMessage() const
Returns the last message received from our license server or if none available the last available mes...
size_t DCA_INDEX_TYPE
Type for index access (used for arrays and collections).
Definition: base_types.h:66
DbType dbType
The type of the database.
Definition: base_classes.h:820
std::string getDescription() const
Returns the description for the error or warning.
size_t DCA_SIZE_TYPE
Type for size (used for size of array and collections).
Definition: base_types.h:72
bool isUnknownMalware() const
Returns whether a Malware is known or unknown. A Malware is unknown if it is not contained in the dat...
std::string getTicket() const
Returns the ticket of the license as string.
MalwareCategoriesInfo getCategoriesInfo() const
Retrieve the MalwareCategoriesInfo class.
#define DCA_BINDIR
DCA subdirectory of the DCA binaries.
Definition of a container class for MalwareCategory objects.
Standard function result.
Definition: base_classes.h:148
dca::FunctionResult classify(const Hash &aHash, MalwareClassificationResults &malwareResults) const
Performs the Malware classification and returns the results.
std::string getSession() const
Returns the session of the license as string.
MalwareDbClassifier createDbClassifier(const dca::DbConnection &aDbConnection, const MalwareDbClassifierOptions &options=MalwareDbClassifierOptions()) const
Create a Malware database classifier. The classifier is created by using the provided database connec...
MalwareEnum byId(DCA_ENUM_ID_TYPE id) const
Returns the MalwareEnums with the given id.
DCA_SIZE_TYPE enumsSize() const
Returns the count of MalwareEnum items associated with current MalwareCategory.
const std::string S_UsageString
Usage string, displayed if a parameter is missing.
DbConnection createDbConnection(const License &aLicense, const DbConnectionData &dbcData, const ProxySettings &proxySettings=ProxySettings(), LogLevel aLogLevel=LOG_Initial) const
Creates a DbConnection object using the given DbConnectionData.
This structure is used to initialize the DcaInstance.
Definition: base_classes.h:264
MalwareEnums getEnums() const
Returns the contained MalwareEnums.
static DcaInstance create(const InitData &initData)
Creates a DcaInstance, starts up the DCA API and initializes the required main module.
static void SetupLicense(const std::string &ticket, const std::string &product, LicenseData &licenseData)
Sets up the given licenseData by copying the given ticket and product strings.
int value
The value related to the given category.
The MalwareEnum class describes a single enumerable item (currebtly not used for malware)
Definition: malware_enum.h:26
License createLicense(const LicenseData &licData, const ProxySettings &proxySettings=ProxySettings(), LogLevel aLogLevel=LOG_Initial) const
Creates a License object using the given LicenseData.
int main(int argc, char *argv[])
The main routine.