my $re = qr/]* # an anchor tag href= # the Href in the anchor (["'])((?:(?!\1).)*)\1 # the value in the href [^>]*> # anything to the end of the anchor ([^<>]*) # Set brackets around it and get it as $2 <\/a> # the end of the anchor (?:\s|<[^>]*>)+? # I THINK you need the ? here, otherwise you would slurp everything up to the last date (\d{4}-\d{2}-\d{2}) # the 9 digit number /isxm ;