Answer the question
In order to leave comments, you need to log in
How to loop through all the tags in an HTML file and populate the JSON according to a set of rules?
Hello!
There is a local html file, which is essentially a list of articles, each has a title, date, body, and link. (Example at end of question)
I'm using Node.js, fs, cheerio to populate a JSON object with data from this html file.
How to sequentially go through the file and fill in the following JSON on the go?
[
{ title: 'abc', date: '10.10.10', body: ' P tags here', href: '' },
{ title: 'abc', date: '10.10.10', body: ' P tags here', href: '' },
{ title: 'abc', date: '10.10.10', body: ' P tags here', href: '' }
]
<h1 class="header"><a id="_Toc446404887"></a><a id="_Toc446498856"></a><a id="_Toc473706079"></a>Тест 1</h1>
<h2 class="date"><a id="_Toc446411230"></a><a id="_Toc446404888"></a><a id="_Toc446318335"></a><a id="_Toc446498857"></a><a id="_Toc473706080"></a>10.10.10</h2>
<p>P</p>
<p>tags</p>
<p>here</p>
<a class="source"><a href="test.html">test.html</a></a>
<h1 class="header"><a id="_Toc446404887"></a><a id="_Toc446498856"></a><a id="_Toc473706079"></a>Тест 1</h1>
<h2 class="date"><a id="_Toc446411230"></a><a id="_Toc446404888"></a><a id="_Toc446318335"></a><a id="_Toc446498857"></a><a id="_Toc473706080"></a>10.10.10</h2>
<p>P</p>
<p>tags</p>
<p>here</p>
<a class="source"><a href="test.html">test.html</a></a>
Answer the question
In order to leave comments, you need to log in
I am not familiar with Cheerio, but as I understand it, it is similar to jQuery.
maybe on it you can stir up something like this
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Example</title>
<script
src="https://code.jquery.com/jquery-3.5.1.min.js"
integrity="sha256-9/aliU8dGd2tb6OSsuzixeV4y/faTqgFtohetphbbj0="
crossorigin="anonymous"></script>
</head>
<body>
<h1 class="header"><a id="_Toc446404887"></a><a id="_Toc446498856"></a><a id="_Toc473706079"></a>Тест 1</h1>
<h2 class="date"><a id="_Toc446411230"></a><a id="_Toc446404888"></a><a id="_Toc446318335"></a><a id="_Toc446498857"></a><a id="_Toc473706080"></a>10.10.10</h2>
<p>P1</p>
<p>tags1</p>
<p>here1</p>
<a class="source"><a href="test1.html">test1.html</a></a>
<h1 class="header"><a id="_Toc446404887"></a><a id="_Toc446498856"></a><a id="_Toc473706079"></a>Тест 2</h1>
<h2 class="date"><a id="_Toc446411230"></a><a id="_Toc446404888"></a><a id="_Toc446318335"></a><a id="_Toc446498857"></a><a id="_Toc473706080"></a>11.11.11</h2>
<p>P2</p>
<p>tags2</p>
<p>here2</p>
<a class="source"><a href="test2.html">test2.html</a></a>
<script>
'use strict';
var article_selector = $('.header');
var arr = [];
var count = 0;
for (var item of article_selector) {
var next_selector = $(item).next();
var row = {};
row.title = $(item).text();
if (next_selector.attr('class') == 'date') {
row.date = next_selector.text();
}
var href = '';
var loop_selector = next_selector.next();
var i = 0;
row.body = '';
while (i != 1) {
if (loop_selector.attr('class') == undefined) {
row.body += loop_selector.text()+' ';
loop_selector = loop_selector.next();
} else {
if (loop_selector.attr('class') == 'source') {
var elem = loop_selector;
href = elem[0]['nextSibling']['href'];
}
i = 1;
break;
}
}
row.href = href;
arr.push(row);
count++;
}
console.log(arr);
</script>
</body>
</html>
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question