tcppwebbrowser

How to Get the Html Content From TCppWebBrowser in C Builder

//-----------------------------------.h file --------------------------------

#ifndef Unit1H
#define Unit1H
//---------------------------------------------------------------------------
#include "Classes.hpp"
#include "Controls.hpp"
#include "StdCtrls.hpp"
#include "Forms.hpp"
#include "SHDocVw_OCX.h"
#include "OleCtrls.hpp"
//---------------------------------------------------------------------------
class TForm1 : public TForm
{
__published: // IDE-managed Components
    TButton *Button1;
    TButton *Button2;
    TCppWebBrowser *CppWebBrowser1;
    TMemo *Memo1;
    TButton *Button3;
    void __fastcall Button1Click(TObject *Sender);
    void __fastcall Button2Click(TObject *Sender);
    void __fastcall Button3Click(TObject *Sender);
private: // User declarations
public:  // User declarations
    __fastcall TForm1(TComponent* Owner);
};
//---------------------------------------------------------------------------
extern PACKAGE TForm1 *Form1;
//---------------------------------------------------------------------------
#endif

//----------------------------------.cpp file -----------------------------

//---------------------------------------------------------------------------

#include
#pragma hdrstop

#include "Unit1.h"
#include
//---------------------------------------------------------------------------
#pragma package(smart_init)
#pragma link "SHDocVw_OCX"
#pragma resource "*.dfm"
TForm1 *Form1;
//---------------------------------------------------------------------------
__fastcall TForm1::TForm1(TComponent* Owner)
    : TForm(Owner)
{
}
//---------------------------------------------------------------------------

void __fastcall TForm1::Button1Click(TObject *Sender)
{
    CppWebBrowser1->Navigate(L"http://codeback.net");
}

//---------------------------------------------------------------------------
void __fastcall TForm1::Button2Click(TObject *Sender)
{
    if (CppWebBrowser1->Document == NULL)
        return;

    BSTR strContent;
    IHTMLDocument2* pDoc;
    if (SUCCEEDED(CppWebBrowser1->Document->QueryInterface(IID_IHTMLDocument2, (void**)&pDoc)))
    {
        IHTMLElement* pBody;
        pDoc->get_body(&pBody);
        pBody->get_innerHTML(&strContent);
        pDoc->close();
        pDoc->Release();
    }
    Memo1->Text = AnsiString(strContent);
}

//---------------------------------------------------------------------------
void __fastcall TForm1::Button3Click(TObject *Sender)
{
    IHTMLDocument2* pDoc;
    IDispatch* spDispatch;
    IHTMLElement* pElement;
    IHTMLElementCollection* pElementCol;

    if (SUCCEEDED(CppWebBrowser1->Document->QueryInterface(IID_IHTMLDocument2, (void**)&pDoc)))
    {
        if (SUCCEEDED(pDoc->get_all(&pElementCol)))
        {
            long lLength = 0;
            if (SUCCEEDED(pElementCol->get_length(&lLength)))
            {
                for (int i = 0; i < lLength; ++i)
                {
                    VARIANT index;
                    index.vt = VT_I4;
                    index.lVal = i;
                    if (SUCCEEDED(pElementCol->item(index, index, &spDispatch)))
                    {
                        if(SUCCEEDED(spDispatch->QueryInterface(IID_IHTMLElement, (void**)&pElement)))
                        {
                            BSTR TagName = NULL;
                            pElement->get_tagName(&TagName);

                            if (AnsiString(TagName).AnsiPos("HTML") > 0)
                            {
                                BSTR HtmlContent;
                                pElement->get_outerHTML(&HtmlContent);
                                Memo1->Text = AnsiString(HtmlContent);
                                SysFreeString(HtmlContent);
                            }
                            SysFreeString(TagName);

                            pElement->Release();
                        }
                        spDispatch->Release();
                    }

                }//end of for

            }
            pElementCol->Release();
        }

        pDoc->Release();
    }
}
//---------------------------------------------------------------------------

//----------------------------------Form1 text--------------------------

object Form1: TForm1
  Left = 271
  Top = 173
  Width = 743
  Height = 495
  Caption =
    'How to get the html content from TCppWebBrowser in C Builder--Fr' +
    'om codeback.net'
  Color = clBtnFace
  Font.Charset = DEFAULT_CHARSET
  Font.Color = clWindowText
  Font.Height = -11
  Font.Name = 'MS Sans Serif'
  Font.Style = []
  OldCreateOrder = False
  PixelsPerInch = 96
  TextHeight = 13
  object Button1: TButton
    Left = 208
    Top = 416
    Width = 75
    Height = 25
    Caption = 'GoUrl'
    TabOrder = 0
    OnClick = Button1Click
  end
  object Button2: TButton
    Left = 320
    Top = 416
    Width = 81
    Height = 25
    Caption = 'GetBodyHtml'
    TabOrder = 1
    OnClick = Button2Click
  end
  object CppWebBrowser1: TCppWebBrowser
    Left = 16
    Top = 8
    Width = 417
    Height = 393
    TabOrder = 2
    ControlData = {
      4C000000192B00009E2800000000000000000000000000000000000000000000
      000000004C000000000000000000000001000000E0D057007335CF11AE690800
      2B2E126208000000000000004C0000000114020000000000C000000000000046
      8000000000000000000000000000000000000000000000000000000000000000
      00000000000000000100000000000000000000000000000000000000}
  end
  object Memo1: TMemo
    Left = 440
    Top = 8
    Width = 273
    Height = 393
    Lines.Strings = (
      'Memo1')
    TabOrder = 3
  end
  object Button3: TButton
    Left = 432
    Top = 416
    Width = 81
    Height = 25
    Caption = 'GetFullHtml'
    TabOrder = 4
    OnClick = Button3Click
  end
end

The codes above has been tested under the enviroment of C++Builder 6 + Windows 7.

 

How to get the html content from TCppWebBrowser in C Builder?From codeback.net

Here I supply two methods, if you just want to get the html content of body tag, you could use the following codes. If you want to run this program, just create a new project, put a CppWebBrowser, a Memo and 3 buttons onto the form, then assign the codes to the button's handler. You could get all the project source codes here:

Get Html Source in TCppWebBrowser

//---------------------------------------------------------------------------
void __fastcall TForm1::Button2Click(TObject *Sender)
{
    if (CppWebBrowser1->Document == NULL)
        return;

    BSTR strContent;
    IHTMLDocument2* pDoc;
    if (SUCCEEDED(CppWebBrowser1->Document->QueryInterface(IID_IHTMLDocument2, (void**)&pDoc)))
    {
        IHTMLElement* pBody;
        pDoc->get_body(&pBody);
        pBody->get_innerHTML(&strContent);
        pDoc->close();
        pDoc->Release();
    }
    Memo1->Text = AnsiString(strContent);
}

While if you want to get all the web document's html source, you have to use the method like this:

//---------------------------------------------------------------------------
void __fastcall TForm1::Button3Click(TObject *Sender)
{
    IHTMLDocument2* pDoc;
    IDispatch* spDispatch;
    IHTMLElement* pElement;
    IHTMLElementCollection* pElementCol;

    if (SUCCEEDED(CppWebBrowser1->Document->QueryInterface(IID_IHTMLDocument2, (void**)&pDoc)))
    {
        if (SUCCEEDED(pDoc->get_all(&pElementCol)))
        {
            long lLength = 0;
            if (SUCCEEDED(pElementCol->get_length(&lLength)))
            {
                for (int i = 0; i < lLength; ++i)
                {
                    VARIANT index;
                    index.vt = VT_I4;
                    index.lVal = i;
                    if (SUCCEEDED(pElementCol->item(index, index, &spDispatch)))
                    {
                        if(SUCCEEDED(spDispatch->QueryInterface(IID_IHTMLElement, (void**)&pElement)))
                        {
                            BSTR TagName = NULL;
                            pElement->get_tagName(&TagName);

                            if (AnsiString(TagName).AnsiPos("HTML") > 0)
                            {
                                BSTR HtmlContent;
                                pElement->get_outerHTML(&HtmlContent);
                                Memo1->Text = AnsiString(HtmlContent);
                                SysFreeString(HtmlContent);
                            }
                            SysFreeString(TagName);

                            pElement->Release();
                        }
                        spDispatch->Release();
                    }

                }//end of for

            }
            pElementCol->Release();
        }

        pDoc->Release();
    }
}
//---------------------------------------------------------------------------

If you are using Delphi, see the save method written in pascal codes here: http://codeback.net/how-to-get-a-webpages-html-source-by-using-twebbrowser-component-in-delphi

 

In C++Builder, there is a useful component TCppWebBrowser, which could allow us to access internet web pages easily. While to manipulate the table elements is not so easy. The following are some codes doing this.

TCppWebBrowser* pWebBrowser;
IHTMLDocument2* pHtmlDoc2;
ASSERT(lpDispatch);

//Query IHTMLDocument2 interface
HRESULT hr = pWebBrowser->QueryInterface(IID_IHTMLDocument2, (void**)&pHtmlDoc2);
if(hr != S_OK)
{
    return FALSE;
}

//Getting table dispatch
IHTMLElementCollection *all;
pHtmlDoc2->get_all(&all);
IDispatch* distable;
all->item(COleVariant(strTableName.c_str()), COleVariant(short(0)), &distable);
IHTMLTable *pITable = NULL;
if(distable != NULL)
{
    hr = distable->QueryInterface(IID_IHTMLTable, (void**)&pITable);
    ASSERT(hr == S_OK);
}

//Getting row dispatch
IHTMLElementCollection *pIRows;
hr = pITable->get_rows(&pIRows);
ASSERT(hr == S_OK);
IDispatch *disrow;
hr = pIRows->item(COleVariant(long(nRow)),COleVariant(short(0)),&disrow);
if (hr != S_OK || disrow == NULL)
    return FALSE;

IHTMLTableRow *pIRow = NULL;
if(disrow != NULL)
{
    hr=disrow->QueryInterface(IID_IHTMLTableRow, (void**)&pIRow);
    ASSERT(hr==S_OK);
}

//Getting cells
IHTMLElementCollection* rowcells = NULL;
if(pIRow != NULL)
{
    pIRow->get_cells(&rowcells);
}

IDispatch* discell = NULL;
if(rowcells != NULL)
{
    hr=rowcells->item(COleVariant(long(nColumn)),COleVariant(short(0)),&discell);//iCol col
    if(hr != S_OK || discell == NULL)
        return FALSE;
}

IHTMLElement* cell = NULL;
if(discell != NULL)
{
    hr = discell->QueryInterface(IID_IHTMLElement,(void **)&cell);
    ASSERT(hr == S_OK);
}

//Getting cell values
CString strHtmlValue;
strHtmlValue = strValue.c_str();
BSTR bsStr = strHtmlValue.AllocSysString();
cell->put_innerHTML(bsStr);
SysFreeString(bsStr);

//Rerease interface
if (cell)
    cell->Release();

if (pIRow)
    pIRow->Release();

if (pITable)
    pITable->Release();

 

This is because you do not call the OleInitialize function. As the OLE operation is not thread-safe, so operation to the memory will cause failure or error. OleInitialize ensure that the concurrent modules run in an STA mode (that is, a single-threaded mode)  to avoid the sharing of resources error. Therefore, in a program's starting function WinMain, we should make some changes as following.
WINAPI WinMain (HINSTANCE, HINSTANCE, LPSTR, int)
(
    OleInitialize (NULL);
    try
    (
       ... ... //Normal statements
    ) catch (...)
    (
       ... ... //Normal statements
    )
    OleUninitialize ();
    return 0;
)

In a C-Builder program, you could also call the OleInitialize function in the main form's OnCreate event, like below mode. Last, call the OleUninitialize function when the form destroy.

void __fastcall TForm1::FormCreate(TObject *Sender)
{
    OleInitialize(NULL);   
}

void __fastcall TForm1::FormDestroy(TObject *Sender)
{
    OleUninitialize();   
}

In a Delphi program, is in a same way.

 

When we write our program using TCppWebBrowser, we always want it to open a new web page by our custom window. While if you do not handle the OnNewWindow2 event, it would runs the default webbrowser to open the new url. This example shows how to handle the OnNewWindow2 event and get the result of what we want.

Create a new project, put a TCppWebBrowser onto the form. Assign codes to the form's OnShow and CppWebBrowser1's OnNewWindow2 handler. As following.

void __fastcall TForm1::FormShow(TObject *Sender)
{
    CppWebBrowser1->Navigate(L"http://codeback.net");   
}
//---------------------------------------------------------------------------

void __fastcall TForm1::CppWebBrowser1NewWindow2(TObject *Sender,
      LPDISPATCH *ppDisp, VARIANT_BOOL *Cancel)
{
    //ppDisp: Is the new TCppWebBrowser's interface.
    //Cancel: Set this value to true, will cause the new url opened in same window.
    TForm1 *fmNew = new TForm1(this);
    fmNew->Show();
    *ppDisp = fmNew->CppWebBrowser1->ControlInterface;
}
//---------------------------------------------------------------------------

To see how to solve same problem in Delphi, check this article How to open a new web page in your custom window with TWebBrowser in Delphi .

 

Webbrowser supports many of the surface to control the browser. This is a sample to show how to insert an image into the current position in the webbrowser component. 

Make a new project, then put a cppwebbrowser and a button to the form.  double click the button, adding the following codes.

void __fastcall TForm1::Button1Click(TObject *Sender)
{
    IHTMLDocument2*   doc;  
    HRESULT   hr   =   CppWebBrowser1->Document->QueryInterface(IID_IHTMLDocument2,(void**)&doc); 

    if(hr   ==   S_OK)  
    {  
          VARIANT   var;  
          VARIANT_BOOL         receive;  
          doc->execCommand(L"InsertImage",t rue,v ar, &receive);  
          doc->Release();  
    }  

}

Run your project, enjoy the program! Of course, don't forget to add "#include  " in your header. 

To make the cppwebbrowser editable, see the article here http://codeback.net/how-to-make-the-tcppwebbrowser-editable-in-c-builder