正则表达式去除字符串中的标签

假如有一个字符串如下:

// 示例用法
const html = `
    <div>
        <p>Hello</p>
        <font style="color:#0000FF;">This should be removed</font>
        <font style="color:#FF0000;">This should be kept</font>
        <span>Other tags</span>
        <font style="color:#FF0000;">This should be kept</font>
        <font style="color:#00FF00;">This should also be removed</font>
        <style>
            .name{
                font-size:14px;
            }
        </style>
    </div>
`;

其中含有p标签,font标签,style,下面介绍如何去除所有标签只保留标签中的文本内容:

1.去除所有标签,通过正则表达式:

function removeTagsExceptSpecificFont(html) {
    // 移除所有其他标签
    let cleanedText = html.replace(/<[^>]+>/g, '');
    return cleanedText;
}

// 示例用法
const html = `
    <div>
        <p>Hello</p>
        <font style="color:#0000FF;">This should be removed</font>
        <font style="color:#FF0000;">This should be kept</font>
        <span>Other tags</span>
        <font style="color:#FF0000;">This should be kept</font>
        <font style="color:#00FF00;">This should also be removed</font>
        <style>
            .name{
                font-size:14px;
            }
        </style>
    </div>
`;

const result = removeTagsExceptSpecificFont(html);
console.log(result); 

2.去除style标签及其里面的内容

function removeTagsExceptSpecificFont(html) {
    html = html.replace(/<style[^>]*>.*?<\/style>/gs, '');
    // 移除所有其他标签
    let cleanedText = html.replace(/<[^>]+>/g, '');
    return cleanedText;
}

// 示例用法
const html = `
    <div>
        <p>Hello</p>
        <font style="color:#0000FF;">This should be removed</font>
        <font style="color:#FF0000;">This should be kept</font>
        <span>Other tags</span>
        <font style="color:#FF0000;">This should be kept</font>
        <font style="color:#00FF00;">This should also be removed</font>
        <style>
            .name{
                font-size:14px;
            }
        </style>
    </div>
`;

const result = removeTagsExceptSpecificFont(html);
console.log(result); 

3.去除script标签及其里面的内容

function removeTagsExceptSpecificFont(html) {
    html = html.replace(/<script[^>]*>.*?<\/script>/gs, '');
    // 移除所有其他标签
    let cleanedText = html.replace(/<[^>]+>/g, '');
    return cleanedText;
}

// 示例用法
const html = `
    <div>
        <p>Hello</p>
        <font style="color:#0000FF;">This should be removed</font>
        <font style="color:#FF0000;">This should be kept</font>
        <span>Other tags</span>
        <font style="color:#FF0000;">This should be kept</font>
        <font style="color:#00FF00;">This should also be removed</font>
        <script>
            window.onload = function(){}
        </script>
    </div>
`;

const result = removeTagsExceptSpecificFont(html);
console.log(result); 

4.保留特定标签,如保留font标签,其他标签去除

let cleanedText = html.replace(/<\/?(?!font)(?!p)[a-z]+?[^>]*?>/gi, '');

 5.保留特定标签及其样式,如保留标签名为font及color:#0000FF的标签,其他标签去除

function removeTagsExceptSpecificFont(html) {
    // 第一步:匹配并保留 <font style="color:#FF0000;"> 和 </font> 标签
    const fontTags = html.match(/<font\s+style="color:#FF0000;">.*?<\/font>/g) || [];
    console.log('fontTags',fontTags);
    // 第二步:移除所有其他标签
    let cleanedText = html.replace(/<[^>]+>/g, '');
    // 将保留的 <font> 标签重新插入到清理后的文本中
    let index = -1;//考虑到字符串中存在相同的文本,添加index为起始索引
    fontTags.forEach(e=>{
        console.log('e',e);
        let a = e.replace(/<[^>]+>/g, '');
        let i = cleanedText.indexOf(a,index)
        let str = cleanedText.substring(0,i);
        let end = cleanedText.substring(i);
        end = end.replace(e.replace(/<[^>]+>/g, ''),e);
        //cleanedText = cleanedText.replace(e.replace(/<[^>]+>/g, ''),e);
        cleanedText = str+end;
        index = i+e.length
    })
    return cleanedText;
}

// 示例用法
const html = `
    <font style="color:#FF0000;">test</font>
    <div>
        <p>Hello</p>
        <font style="color:#0000FF;">This should be removed</font>
        <font style="color:#FF0000;">This should be kep</font>
        <font style="color:#FF0000;">This should be kept22</font>
        <span>Other tags</span>
        <font style="color:#FF0000;">This should be kept11</font>
        <font style="color:#00FF00;">This should also be removed</font>
    </div>
`;

const result = removeTagsExceptSpecificFont(html);
console.log(result); 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值