假如有一个字符串如下:
// 示例用法
const html = `
<div>
<p>Hello</p>
<font style="color:#0000FF;">This should be removed</font>
<font style="color:#FF0000;">This should be kept</font>
<span>Other tags</span>
<font style="color:#FF0000;">This should be kept</font>
<font style="color:#00FF00;">This should also be removed</font>
<style>
.name{
font-size:14px;
}
</style>
</div>
`;
其中含有p标签,font标签,style,下面介绍如何去除所有标签只保留标签中的文本内容:
1.去除所有标签,通过正则表达式:
function removeTagsExceptSpecificFont(html) {
// 移除所有其他标签
let cleanedText = html.replace(/<[^>]+>/g, '');
return cleanedText;
}
// 示例用法
const html = `
<div>
<p>Hello</p>
<font style="color:#0000FF;">This should be removed</font>
<font style="color:#FF0000;">This should be kept</font>
<span>Other tags</span>
<font style="color:#FF0000;">This should be kept</font>
<font style="color:#00FF00;">This should also be removed</font>
<style>
.name{
font-size:14px;
}
</style>
</div>
`;
const result = removeTagsExceptSpecificFont(html);
console.log(result);
2.去除style标签及其里面的内容
function removeTagsExceptSpecificFont(html) {
html = html.replace(/<style[^>]*>.*?<\/style>/gs, '');
// 移除所有其他标签
let cleanedText = html.replace(/<[^>]+>/g, '');
return cleanedText;
}
// 示例用法
const html = `
<div>
<p>Hello</p>
<font style="color:#0000FF;">This should be removed</font>
<font style="color:#FF0000;">This should be kept</font>
<span>Other tags</span>
<font style="color:#FF0000;">This should be kept</font>
<font style="color:#00FF00;">This should also be removed</font>
<style>
.name{
font-size:14px;
}
</style>
</div>
`;
const result = removeTagsExceptSpecificFont(html);
console.log(result);
3.去除script标签及其里面的内容
function removeTagsExceptSpecificFont(html) {
html = html.replace(/<script[^>]*>.*?<\/script>/gs, '');
// 移除所有其他标签
let cleanedText = html.replace(/<[^>]+>/g, '');
return cleanedText;
}
// 示例用法
const html = `
<div>
<p>Hello</p>
<font style="color:#0000FF;">This should be removed</font>
<font style="color:#FF0000;">This should be kept</font>
<span>Other tags</span>
<font style="color:#FF0000;">This should be kept</font>
<font style="color:#00FF00;">This should also be removed</font>
<script>
window.onload = function(){}
</script>
</div>
`;
const result = removeTagsExceptSpecificFont(html);
console.log(result);
4.保留特定标签,如保留font标签,其他标签去除
let cleanedText = html.replace(/<\/?(?!font)(?!p)[a-z]+?[^>]*?>/gi, '');
5.保留特定标签及其样式,如保留标签名为font及color:#0000FF的标签,其他标签去除
function removeTagsExceptSpecificFont(html) {
// 第一步:匹配并保留 <font style="color:#FF0000;"> 和 </font> 标签
const fontTags = html.match(/<font\s+style="color:#FF0000;">.*?<\/font>/g) || [];
console.log('fontTags',fontTags);
// 第二步:移除所有其他标签
let cleanedText = html.replace(/<[^>]+>/g, '');
// 将保留的 <font> 标签重新插入到清理后的文本中
let index = -1;//考虑到字符串中存在相同的文本,添加index为起始索引
fontTags.forEach(e=>{
console.log('e',e);
let a = e.replace(/<[^>]+>/g, '');
let i = cleanedText.indexOf(a,index)
let str = cleanedText.substring(0,i);
let end = cleanedText.substring(i);
end = end.replace(e.replace(/<[^>]+>/g, ''),e);
//cleanedText = cleanedText.replace(e.replace(/<[^>]+>/g, ''),e);
cleanedText = str+end;
index = i+e.length
})
return cleanedText;
}
// 示例用法
const html = `
<font style="color:#FF0000;">test</font>
<div>
<p>Hello</p>
<font style="color:#0000FF;">This should be removed</font>
<font style="color:#FF0000;">This should be kep</font>
<font style="color:#FF0000;">This should be kept22</font>
<span>Other tags</span>
<font style="color:#FF0000;">This should be kept11</font>
<font style="color:#00FF00;">This should also be removed</font>
</div>
`;
const result = removeTagsExceptSpecificFont(html);
console.log(result);