1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
|
# Look for Open Graph data - http://ogp.me
title: //meta[@property="og:title"]/@content
date: //meta[@property="article:published_time"]/@content
# article:author is someties URL, e.g. on guardian.co.uk
# Remove Google Publisher Tags: https://support.google.com/dfp_sb/answer/1649768?hl=en
#strip_id_or_class: div-gpt-ad
# Strip doubleclick image ads
strip_image_src: doubleclick.net
# If you get chunks of Javascript code appearing in the extracted output, try uncommenting the lines below.
# This tries to convert script tags to hidden div elements (which Full-Text RSS removes).
# If you notice issues with this approach, please let us know.
#find_string: <script
#replace_string: <div style="display:none"
#find_string: </script>
#replace_string: </div>
# convert amp image tag to html image tag
find_string: <amp-img
replace_string: <img
find_string: </amp-img>
replace_string: <!-- nothing -->
# strip all class attributes after processing (not supported in Full-Text RSS yet)
post_strip_attr: //*/@class
|