Download dynamic content from AJAX driven sites.

The task was to download the page content from a website that was driven by AJAX. So it was necessary to have a browser with JavaScript enabled and to wait a moment after download to give JavaScript/AJAX a chance to modify page content.

The SWT of Eclipse owns such a browser implementation. I used it in NetBeans - worked fine.

So here we go:

1st we've to import the swt libraries

import org.eclipse.swt.SWT;
import org.eclipse.swt.browser.Browser;
import org.eclipse.swt.browser.ProgressEvent;
import org.eclipse.swt.browser.ProgressListener;
import org.eclipse.swt.widgets.Display;
import org.eclipse.swt.widgets.Shell;


Next we implement a progress listener. The time offset mentioned above is here in the completed method. Adapt it!

private class MyProgressListener implements ProgressListener {

private String content;
private Boolean done;

public MyProgressListener() {
this.done = false;
}

public String getContent() {
return content;
}

public void setDone(Boolean done) {
this.done = done;
}

public Boolean getDone() {
return done;
}

public void changed(ProgressEvent event) {
this.done = false;
}


private void completed(ProgressEvent event) {
Browser browser = (Browser) event.widget;
try {
//Here is where wait to let Javascript/Ajax stuff take place and modify the content
Thread.sleep(2000);
} catch (Exception ignored) {
}
content = browser.getText();
done = true;
}
}

Then we declare our myBrowser class. Adapt path in method initBrowser!

private class MyBrowser {

private Display display;
private Shell shell;
private Browser browser;
private MyProgressListener myProgressListener;

private void initBrowser() {
System.setProperty("swt.library.path", "/usr/lib/jni");

this.display = new Display();
this.shell = new Shell(display);
this.browser = new Browser(shell, SWT.NONE);
}

private String getURLContentByBrowser(String loadURL) {

this.browser.setUrl(loadURL);

this.myProgressListener = new MyProgressListener();
this.browser.addProgressListener(myProgressListener);

while (!myProgressListener.getDone() == true) {
if (!this.display.readAndDispatch()) {
this.display.sleep();
}
}
return this.myProgressListener.getContent();
}

private void disposeBrowser() {
this.shell.dispose();
this.display.dispose();
this.browser.dispose();
}
}


And finally wi'll download the stuff

private String getURLContentByStream(String loadURL) {
String content = "";
try {
URL url = new URL(loadURL);
InputStream inputStream = url.openStream();
DataInputStream dataInputStream = new DataInputStream(new BufferedInputStream(inputStream));

String line = "";

while ((line = dataInputStream.readLine()) != null) {
content += line;
}

inputStream.close();

} catch (Exception e) {
e.printStackTrace();
}
return content;
}


Have phun

Comments

Popular Posts