awk 'BEGIN{ FS=OFS=","; print "page,item,id,name,size,weight,glass,price" }
$2!=""{ price=$5; data=$1 FS $2 FS $3 FS $4; desc=""; c=0; next }
{ gsub(/ ?(mm \(size\)|Weight:|kg|Glass:) ?/, "") }
++c<=3{ desc=(desc==""?"":desc OFS) $4; next }
data { print data, desc, price; data="" }
' infile
including explanation:
awk 'BEGIN{ FS=OFS=","; print "page,item,id,name,size,weight,glass,price" }
#this block will be executed only once before reading any line, and does:
#set FS (Field Separator), OFS (Output Field Separator) to comma;a printcomma character
#print the "header" line ....
$2!=""{ price=$5; data=$1 FS $2 FS $3 FS $4; desc=""; c=0; next }
#this blocks will be executed only when column#2 value was not empty, and does:
#get#backup thecolumn#5 datainto if"price" column#2variable
was non-empty #also backup columns#1~4 into "data" variable
#reset the "desc" variable and processalso counter variable "c"
#then read next line and skip processing the rest of the code
{ gsub(/ ?(mm \(size\)|Weight:|kg|Glass:) ?/, "") }
#cleanup#this unwantedblock runs for every line and replace strings above with empty string
++c<=3{ desc=(desc==""?"":desc OFS) $4; next }
#this block runs at most 3reps and
#collect #joining the descriptions in 3column#4 consecutiveof linesevery line
#and read the next line until counter var "c" has value <=3
data { print data, desc, price; data="" }
#if "data" variable has containing any data, then
#print the data, desc, price and resetempty the"data" variablesvariable
' infile