In this article I am going to explain how to extract the
filename from html column using sql function.
Below is the function which i used to do this operation
CREATE FUNCTION dbo.fn_getFilenames(@InputHTML NVARCHAR(MAX))
RETURNS @res TABLE (pdf NVARCHAR(MAX)) AS
BEGIN
-- assumes there
are no single quotes or double quotes in the PDF filename
DECLARE @i INT, @j INT, @k INT, @tmp NVARCHAR(MAX);
SET @i = CHARINDEX(N'.pdf', @InputHTML);
WHILE @i > 0
BEGIN
SELECT @tmp = left(@InputHTML, @i+3);
SELECT @j = CHARINDEX('/', REVERSE(@tmp)); -- directory delimiter
SELECT @k = CHARINDEX('"', REVERSE(@tmp)); -- start of href
IF @j = 0 or (@k > 0 and @k < @j) SET @j = @k;
SELECT @k = CHARINDEX('''', REVERSE(@tmp)); -- start of href (single-quote*)
IF @j = 0 or (@k > 0 AND @k < @j) SET @j = @k;
INSERT @res VALUES (SUBSTRING(@tmp, len(@tmp)-@j+2, len(@TMP)));
SELECT
@InputHTML = STUFF(@InputHTML, 1, @i+4, ''); -- remove up to
".pdf"
SET @i = CHARINDEX(N'.pdf', @InputHTML);
END
RETURN
END
GO
Below is the query to check the output
-- CREATE TABLE
create table mytable (Html varchar(max));
-- INSERT HTML Content
insert into mytable values('
<p>A
deferred tuition payment plan,
or view the <a
href="/uploadedFiles/uploadedFiles/uploadedFiles/uploadedFiles/Tuition-Reimbursement-Deferred.pdf"
target="_blank">list</a>.</p>')
insert into mytable values('
<p>A
deferred tuition payment plan,
or view the <a
href="Two files here-Reimbursement-Deferred.pdf"
target="_blank">list</a>.</p>And
I use single quotes
<a href=''/look/path/The second
file.pdf''
target="_blank">list</a>');
--SELECT Statement
select t.*, p.pdf
from mytable t
cross apply dbo.extract_filenames_from_a_tags(html) p;
OUTPUT Will be:
Comments
Post a Comment