-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdealer-common.js
159 lines (147 loc) · 6.34 KB
/
dealer-common.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
const fetcher = require('./fetch-with-cache.js');
const cheerio = require('cheerio');
const fs = require('fs');
const childProcess = require('child_process');
const windowStickerUrl = 'https://window-sticker-services.pse.dealer.com/windowsticker/MAKE?vin=VIN'
function getQueryByDealer(dealerUrl, make) {
if (make === 'subaru') {
return 'new-inventory/index.htm?search=&model=Outback&trim=Onyx+Edition+XT';
}
if (dealerUrl.includes('fremontcdjr')) {
// This still does not give us what the browser gets via node-fetch,
// but it seems to work via puppeteer.
// From what I can tell, the request is always to 'new-vehicles/' and all the
// filtering by model and pagination seems to happen in-browser (AJAX?)
// so node-fetch / curl will never see it.
return 'new-vehicles/#action=im_ajax_call&perform=get_results&model=Wrangler&page=1';
}
return 'new-inventory/index.htm?search=&model=Wrangler';
}
async function fetchFromDealer(dealerUrl, make, query) {
if (!query)
query = getQueryByDealer(dealerUrl, make);
const url = `${dealerUrl}${query}`;
const body = await fetcher.getHtml(url);
let result = await parseResults(body, dealerUrl, make, url);
const cars = result.cars;
while (result.reportedCars > cars.length) {
const paginatedUrl = url.replace('search=', `start=${cars.length}`)
const body = await fetcher.getHtml(paginatedUrl);
result = await parseResults(body, dealerUrl, make, paginatedUrl);
cars.push(...result.cars);
}
console.log(`${cars.length} car(s) parsed from ${url}`);
return cars;
}
async function getVinFromCarPage(url) {
const body = await fetcher.getHtml(url);
const content = cheerio.load(body);
const vinLine = content('.additional-details').text();
const vinIdx = vinLine.indexOf("VIN:");
let vin;
if (vinIdx > -1) {
vin = vinLine.slice(vinIdx + 4, vinLine.length).trim();
}
if (!vin) {
vin = content('.value', content('.vin')).text().trim();
}
if (!vin) {
console.warn(`Could not get VIN from ${url}`);
}
return vin;
}
async function parseResults(body, dealer, make, pageUrl) {
const cars = [];
const content = cheerio.load(body);
fs.writeFileSync('page.html', body)
const numCars = content('.vehicle-count').last().text();
const carList = content('.hproduct', '.bd');
// console.log(`numCars = ${numCars}; carList.length = ${carList.length} at ${pageUrl}`);
if (carList.length === 0) {
console.error(`No cars found at ${pageUrl}`);
const dealerForFile = dealer.match(/https?\:\/\/(www\.)?(?<dealer>\w*)\./).groups.dealer;
fs.writeFileSync(`crap/page_${dealerForFile}.html`, body);
childProcess.exec(`curl -L "${pageUrl}" -o crap/curl_${dealerForFile}.html`);
console.error('Page saved for inspection');
if (content('*').text().toUpperCase().includes('CAPTCHA')) {
console.error(`Captcha request at ${pageUrl}`);
}
}
let dealerName = content('.org').text().trim();
if (!dealerName) {
dealerName = 'Unkown dealer name';
}
const dealerAddress = `${content('.street-address').text().trim()}, ${content('.locality').text().trim()}, ${content('.region').text().trim()}, ${content('.postal-code').text().trim()}`;
const dealerCityState = `${content('.locality').text().trim()}, ${content('.region').text().trim()}`;
carList.each(
async (i, car) => {
const name = content('.url', car).text().trim();
const url = `${dealer}${content('.url', car).attr('href')}`;
const imgUrl = content('img', content('.media', car)).attr('src');
const pricing = content('.pricing', car);
let msrp = content('li', pricing).find('.msrp').find('.value').text();
if (!msrp) {
msrp = content('.an-msrp .price', pricing).text();
}
if (!msrp) {
msrp = content('span', pricing).first().next().text();
}
if (!msrp) {
msrp = content('.value', content('.salePrice', pricing)).text();
}
if (!msrp) {
msrp = '0'; // put bad ones at the top.
}
let finalPrice = content('li', pricing).find('.final-price').find('.value').text();
if (!finalPrice) {
finalPrice = content('.an-final-price .price', pricing).text();
}
if (!finalPrice) {
finalPrice = '0';
}
const internetPrice = content('li', pricing).find('.internetPrice').find('.value').text();
let vin = content('.vin dd', car).text();
if (!vin) {
vin = await getVinFromCarPage(url);
if (!vin) {
console.error(`Could not get vin from search resutls at ${pageUrl} or car page at ${url}`);
}
}
const engine = content('.description dt:contains("Engine:")', car).next().text().replace(',', '');
const color = content('.description dt:contains("Exterior Color:")', car).next().text().replace(',', '');
const stockNo = content('.description dt:contains("Stock #:")', car).next().text().replace(',', '');
const theCar = {
pageUrl,
dealerName,
dealerAddress,
dealerCityState,
name,
url,
imgUrl,
engine,
color,
vin,
stockNo,
windowSticker: windowStickerUrl.replace('MAKE', make).replace('VIN', vin),
msrp,
internetPrice,
finalPrice,
prices: []
};
content('li', pricing).each(
(i, price) => {
const number = content('.value', price).text();
const label = content('.label', price).text();
if (label)
theCar.prices.push({ label, number });
})
cars.push(theCar);
}
);
let reportedCars = 0;
if (!isNaN(parseInt(numCars)) && carList.length > 0) {
reportedCars = parseInt(numCars)
}
return { cars, reportedCars };
}
module.exports = { fetchFromDealer, getQueryByDealer }