Ad
CasperJS - NodeList.length Return 0
I tried to extract data from some webpages using CasperJS, I have tried adding this.wait(5000) inside getDetails(), but I don't know why direktoriNodeList.length always return 0
PhantomJS : 2.0.0
CasperJS : 1.1.0-beta3
//casperjs --proxy=127.0.0.1:9050 --proxy-type=socks5 axa-mandiri.casper.js
var casper = require("casper").create({
verbose: true,
logLevel: "info",
pageSettings: {
loadImages: false, //The script is much faster when this field is set to false
loadPlugins: false,
userAgent: "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36"
}
});
var utils = require('utils');
var currentPage = 1;
var hospitals = [];
var url = 'https://www.axa-mandiri.co.id/direktori/rumah-sakit/';//Type your url
casper.start(url);//Start CasperJS
casper.waitForSelector('#main-direktori', processPage, stopScript);//Wait until content loads and then process the page
casper.run(function() {
utils.dump(hospitals);
this.exit();
});
function getDetails(){
/*
In this function you can put anything you want in order to extract your data from the website.
NOTE: This function is executed in page context, and will should be called as parameter to Casper's evaluate function.
*/
.
console.log("getDetails " + currentPage);
var details = [];
var direktoriNodeList = document.querySelectorAll("ul#main-direktori li.direktori-list");
console.log("direktoriNodeList.length " + direktoriNodeList.length);
utils.dump(direktoriNodeList);
for (var i = 0; i < direktoriNodeList.length; i++) {
console.log("querySelectorAll " + i);
var detail = {
name : direktoriNodeList[i].querySelector("div.details strong").textContent.replace(/\n/g, ''),
phone : direktoriNodeList[i].querySelector("div.details span:nth-child(1)").textContent.replace(/\n/g, ''),
map : direktoriNodeList[i].querySelector("div.map-details a.get-direction").getAttribute("href")
};
details.push(detail);
}
/*
[].forEach.call(document.querySelectorAll("ul#main-direktori li.direktori-list"), function(elem) {
console.log("querySelectorAll");
var detail = {
name : elem.querySelector("div.details strong").textContent.replace(/\n/g, ''),
phone : elem.querySelector("div.details span:nth-child(1)").textContent.replace(/\n/g, ''),
map : elem.querySelector("div.map-details a.get-direction").getAttribute("href")
};
details.push(detail);
});
*/
return JSON.stringify(details);
}
function stopScript() {
utils.dump(hospitals);
console.log("Exiting...");
this.exit();
};
function processPage() {
//your function which will do data scraping from the page. If you need to extract data from tables, from divs write your logic in this function
hospitals = hospitals.concat(this.evaluate(getDetails()));
//If there is no nextButton on the page, then exit a script because we hit the last page
if (this.exists("a.nextpostslink") == false) {
stopScript();
}
//Click on the next button
this.thenClick("a.nextpostslink").then(function() {
currentPage++;
this.waitForSelector("#main-direktori", processPage, stopScript);
});
};
Ad
Answer
casper.evaluate(fn, ...)
expects a function, not an array. Change
hospitals = hospitals.concat(this.evaluate(getDetails()));
to
hospitals = hospitals.concat(this.evaluate(getDetails));
The problem here is that you're executing the function in the outer context instead of passing it into the page context. Don't forget to register to the "remote.message" event to see console.log()
calls from the page context:
casper.on("remote.message", function(msg){
this.echo("remote> " + msg);
});
Ad
source: stackoverflow.com
Related Questions
- → How to update data attribute on Ajax complete
- → October CMS - Radio Button Ajax Click Twice in a Row Causes Content to disappear
- → Octobercms Component Unique id (Twig & Javascript)
- → Passing a JS var from AJAX response to Twig
- → Laravel {!! Form::open() !!} doesn't work within AngularJS
- → DropzoneJS & Laravel - Output form validation errors
- → Import statement and Babel
- → Uncaught TypeError: Cannot read property '__SECRET_DOM_DO_NOT_USE_OR_YOU_WILL_BE_FIRED' of undefined
- → React-router: Passing props to children
- → ListView.DataSource looping data for React Native
- → Can't test submit handler in React component
- → React + Flux - How to avoid global variable
- → Webpack, React & Babel, not rendering DOM
Ad