For Loop Only Crawls First Link #44

Open
opened 2025-10-14 18:03:17 -06:00 by navan · 0 comments
Owner

Originally created by @arlingtonkirk on 10/24/2019

The site I'm trying to crawl is rendered by JS and the links use onClick with # as the value for href. I've successfully constructed the links via a filter, but for some reason can't crawl them in a for loop. See code below.

Your environment

  • version of node: run node --version 11.3
  • version of npm: run npm --version 6.4.1

Expected behaviour

Console should spit out 10 elements.

Actual behaviour

Console only outputs first item.

var phantom = require('x-ray-phantom');
var Xray = require('..');
var x = Xray({
filters: {
makeLink: function(value) {
if(value) {
var str1 = 'https://hsbc.taleo.net/careersection/external/jobdetail.ftl?lang=en_GB&job='
var value = str1.concat(value);
}
return typeof value === 'string'
? value
: null
}
}
})
.driver(phantom({webSecurity:false}));

x('https://hsbc.taleo.net/careersection/external/moresearch.ftl?lang=en_GB&dropListSize=10', '.ftlrow', [
{
link: '.editablesection div:nth-child(3) span:nth-child(5) | makeLink',
}
]).then(function(myObj) {

let myRecord;	

for (var key in myObj) {
	var myLink = myObj[key]['link'];
	x(myLink, {
	  title: '.titlepage',
	  description: '.text'
	}).then(function(myObj) {
		console.log(myObj);
		//why is output only one item and not all?
		//console.log(key);
	})
}

//console.log(myRecord);
//it's only showing one time because x is overwritten? 

})

*Originally created by @arlingtonkirk on 10/24/2019* The site I'm trying to crawl is rendered by JS and the links use onClick with # as the value for href. I've successfully constructed the links via a filter, but for some reason can't crawl them in a for loop. See code below. ### Your environment * version of node: *run node --version* 11.3 * version of npm: *run npm --version* 6.4.1 ### Expected behaviour Console should spit out 10 elements. ### Actual behaviour Console only outputs first item. var phantom = require('x-ray-phantom'); var Xray = require('..'); var x = Xray({ filters: { makeLink: function(value) { if(value) { var str1 = 'https://hsbc.taleo.net/careersection/external/jobdetail.ftl?lang=en_GB&job=' var value = str1.concat(value); } return typeof value === 'string' ? value : null } } }) .driver(phantom({webSecurity:false})); x('https://hsbc.taleo.net/careersection/external/moresearch.ftl?lang=en_GB&dropListSize=10', '.ftlrow', [ { link: '.editablesection div:nth-child(3) span:nth-child(5) | makeLink', } ]).then(function(myObj) { let myRecord; for (var key in myObj) { var myLink = myObj[key]['link']; x(myLink, { title: '.titlepage', description: '.text' }).then(function(myObj) { console.log(myObj); //why is output only one item and not all? //console.log(key); }) } //console.log(myRecord); //it's only showing one time because x is overwritten? })
Sign in to join this conversation.
No milestone
No project
No assignees
1 participant
Notifications
Due date
The due date is invalid or out of range. Please use the format "yyyy-mm-dd".

No due date set.

Dependencies

No dependencies set.

Reference: github/x-ray#44
No description provided.