Figure 1 SWS DLLs

File
Description
SWSDLL.DLL
The DLL that runs in the address space of the application to collect all the function execution information for a run. All data manipulation is done in this DLL, including file dumping and final tuning. The _penter function resides here.
SWSFILE.DLL
The DLL responsible for encapsulating the file format information. I implemented this DLL in my October column.
BUGSLAYERUTIL.DLL
The long-running Bugslayer helper DLL.
SWS.EXE
The front end to file dumping and generating the tuned order files.

Figure 4 Command-line Options

SWS.EXE Command-line option
Description
-d <module | .SWS file | .TWS file>
Dumps the data in an .SWS file or .TWS file to the screen so you can see execution counts. If you pass just the module name, the base .SWS file is used.
-g <module>
Generates the core .SWS and .SDW file for the specified module.
-t <module>
Tunes the specific module by summarizing all individual runs and producing a .TWS file to show execution counts and the .PRF file, the linker order file for the module.
-?
Help screen.
-v
Turns on verbose output to help with debugging.
-nologo
Hides the logo.

Figure 5 my_penter

  extern "C" void SWSDLL_DLLINTERFACE __declspec(naked) _penter ( void )
{
    DWORD_PTR dwCallerFunc ;

    // The function prolog.
    __asm
    {
        PUSH EBP                    // Set up the standard stack frame.
        MOV  EBP , ESP

        PUSH EAX                    // Save off EAX as I need to use it
                                    // before saving all registers.
        MOV  EAX , ESP              // Get the current stack value into
                                    //  EAX.

        SUB  ESP , __LOCAL_SIZE     // Save off the space needed by the
                                    // local variables.

        PUSHAD                      // Save off all general register
                                    // values.

        // Now I can calculate the return address.
        ADD  EAX , 04h + 04h        // Account for the PUSH EBP and the
                                    // PUSH EAX.
        MOV  EAX , [EAX]            // Grab the return address.
        SUB  EAX , 5                // Take off the 5 byte jump used to
                                    // call _penter to get to the start
                                    // of the function.
        MOV  [dwCallerFunc] , EAX   // Save the caller function.

    }

    // If the start/stop event is signaled, don't do any processing.
    if ( WAIT_TIMEOUT == WaitForSingleObject ( g_hStartStopEvent , 0 ))
    {
        // Do the work.
        g_cModArray.IncrementFunctionEntry ( dwCallerFunc ) ;
    }
    // The function epilog.
    __asm
    {
        POPAD                       // Restore all general purpose
                                    // values.

        ADD ESP , __LOCAL_SIZE      // Remove space needed for locals.

        POP EAX                     // Restore EAX

        MOV ESP , EBP               // Restore the standard stack frame.
        POP EBP
        RET                         // Return to caller.
    }
}

Figure 6 Symbol Enumeration Callback

  BOOL CALLBACK SymEnumSyms ( PTSTR szSymbolName    ,
                            ULONG ulSymbolAddress ,
                            ULONG ulSymbolSize    ,
                            PVOID pUserContext     )
{
    LPCBCONTEXT pCtx = (LPCBCONTEXT)pUserContext ;

    CImageHlp_Line cLine ;
    DWORD dwDisp ;
    if ( FALSE == g_cSym.SymGetLineFromAddr ( ulSymbolAddress ,
                                              &dwDisp         ,
                                              &cLine            ) )
    {
        // If no source and line was found for this symbol, ignore
        // it.
        return ( TRUE ) ;
    }

    // Is this source file in the ignore file list?
    // TODO TODO
    // Add this!

    // Is this address in the modules code section?  Checking against
    // this will avoid putting IAT symbols into the output files.
    // TODO TODO
    // Add this!

    // Is this symbol in the ignore containing list?
    for ( int i = 0 ; i < IGNORE_CONTAINING_COUNT ; i++ )
    {
        if ( NULL != _tcsstr ( szSymbolName              ,
                               g_szIgnoreContaining[ i ]  ) )
        {
            // Drop out now.
            return ( TRUE ) ;
        }
    }

    // If the filename starts with "_", I need to move over one so that
    // the proper symbol name is used according to the linker.
    PTSTR szFinalName = szSymbolName ;
    if ( _T ( '_' ) == *szSymbolName )
    {
        szFinalName++ ;
    }

    if ( FALSE == pCtx->pSWS->AddData ( ulSymbolAddress , 0 ) )
    {
        ASSERT ( !"Adding to SWS file failed!" ) ;
        return ( FALSE ) ;
    }
    if ( FALSE == pCtx->pSDW->AddData ( ulSymbolAddress ,
                                        ulSymbolSize    ,
                                        szFinalName      ) )
    {
        ASSERT ( !"Adding to SDW file failed!" ) ;
    }
    return ( TRUE ) ;
}

Figure 7 TuneModule Algorithm

  // The TuneModule algorithm.
BOOL TuneModule ( LPCTSTR szModule )
{
    Build the output .TWS filename.

    Copy the base .SWS file to a temporary file
 
    Open the temporary .SWS file

    for each szModule.#.SWS in the directory
    {
        Verify this #.SWS link time matches the 
          temporary .SWS file

        for each address in this #.SWS
        {
            Add this address's execution count to 
              the same address in the temporary file
        }
    }

    Create the empty .TWS file.
    for each item in the temporary .SWS file.
    {
        add the temporary data to the .TWS file
    }

    Copy the .TWS to a temporary .TWS file

    Get the page size for this computer
    
    while not done
    {
        Look for the first entry in the temporary 
          .TWS file that has an address

        If I looped through each address
        {
            done = TRUE 
            break 
        }
        
        If this entry has an execution count of zero
        {
            done = TRUE 
            break 
        }
        
        if this entry's size is less than the remaining
          page size
        {
            Output the entry's function name to the PRF
            Zero out the address so I don't use this one
              again
            Subtract this entry size from the remaining
             page remaining size
        }
        else if this entry's size is greater than a 
          page size
        {
            Punt and just write out the address to the PRF 
              as there's nothing I can do
            Reset the page amount remaining to the page size
        }
        else
        {
            // This entry is too big to fill into the page
            // so find the best fitting function to pack
            // the page.
            for each item in the temporary .TWS file
            {
                if the address is not zero'd out
                {
                    if a best fit has not been found
                    {
                        Set this entry as the best fit overall
                        Set this entry as the best exec count fit
                    }
                    else
                    {
                        if this entry size is > the best fit size
                        {
                            Set this entry as the best fit overall
                        }
                        if this item has an execution count
                        {
                            if this entry size is > the best exec count 
                              size
                            {
                                Set this entry as the best exec count fit
                            }
                        }
                    }
                    
                }
            }
            if no best exec count fit was found
            {
                Set the best exec count fit to the best overall fit
            }
            Output the best exec count function name to the PRF
            Reset the page amount remaining to the page size
        }
        
    }
    Close any temporary files

}