############################################################################### # = gradesheet-rows.rb # # Process rows from an OpenOffice spreadsheet, tailored for class gradesheet. # Copyright (C) 2005 by Allyn Dimock # # Application is to be run on a secure server with access to a grade sheet file. # It can be sent a message with a student id, and will return a HTML table # containing the contents of the visible spreadsheet columns for the student's # row, and for pre-selected rows of the spreadsheet. # # # === development notes # # bug in REXML: in Ruby 1.8.2, order of "table-column"s not maintained by: # col_descs = content_doc.root.get_elements('.//table:table[position()=1]/table:table-column') # # 31-May-2005: Had problems with # REXML::XPath.match( rows[rownum - 1], './/table:table-cell') # returning elements in arbitrary order rather than in order # in XML document so used REXML::Element and REXML::Elements methods instead. # drilling down to "table-row" then getting "table-cell". # # This failure to maintain order in REXML feels like a bug to me, although # no promise is made to maintain order. when fixed, some XPATH expressions # in the following program can be combined. ############################################################################### raise "Please, use ruby1.8.2 or later." if RUBY_VERSION < "1.8.2" require 'zip/zip' require 'rexml/document' ############################################################################## # Class SpreadsheetContent is an exercise in reasing OpenOffice documents # from Ruby. It uses the Ruby Zip package to get the content of a spreadsheet. # It makes the content available in the "content" attribute. ############################################################################### class SpreadsheetContent # content is a REXML::Document created in response to new SpreadsheetContent # similar code to make the styles document available has been commented out # in the source as it was not needed for this application. attr_reader :content #, :styles @invisible_cols = nil @fixed_rows = nil @id_column = nil ############################################################################# # Application-specific initialization. # # use initialize_content to get the content of the spreadsheet as a # REXML:Document # # Also set up the fixed_rows used in sturents_html() and the id_column used # in students_row() ############################################################################# def initialize(filename, fixed_rows, id_column) initialize_content(filename) @fixed_rows = fixed_rows @id_column = id_column @invisible_cols = find_collapsed() end # end initialize # Take a (unexpanded) file name string as input. # Sets the "content" attribute to the XML document from the content portion # of the named file. # # Raises exceptions errors from Zip::ZipFile.open # and from Zip::ZipFile.get_input_stream # and a Errno::ENOENT if the spreadsheet file is not found. # # If the spreadsheet is being updated, you may want to call # initialize_content periodically (with whatever locking is appropriate) # to update the content. def initialize_content(unexpanded_filename) # usable file name fileName = File.expand_path( unexpanded_filename ) # unzip and content.xml from the OpenOffice spreadsheet content_arr = nil # styles_arr = nil if not File.file?(fileName) $stderr.print("File " + fileName + " not found\n") raise Errno::ENOENT, fileName, caller else Zip::ZipFile.open(fileName) { | zf | begin zf.get_input_stream("content.xml") {|str| content_arr = str.readlines} # zf.get_input_stream("styles.xml") {|str| # styles_arr = str.readlines} rescue Exception => exnstring $stderr.print("Exception getting content of spreadsheet " + fileName + ": " + exnstring + "\n") # aka $! zf.close raise # re-raise $! end # end begin } end # end if content_string = content_arr.join #("\n") # styles_string = styles_arr.join #("\n") @content = REXML::Document.new( content_string ) # @styles = REXML::Document.new( styles_string ) # # At this point, spreadsheet should be closed, and (styles_doc),content_doc # should hold the relevant XML as strings. (A Bit Messy, but # REXML::Document.new will take a string or an IO, but not (31-May-2005) a # Zip::IO # end # end initialize method ############################################################################ # find_collapsed returns a table of collapsed columns in the first Sheet of # the spreadsheet and sets them in a private variable for later use. ############################################################################ def find_collapsed() # get columns to be excluded because of visibility restriction in # spreadsheet invisible_cols = [] colnum = 0 col_descs = @content.root.get_elements('.//table:table[position()=1]')[0]. get_elements('./table:table-column') col_descs.each { |col_desc| repetitions = 1 # usual value unless number-columns-repeated hidden = false # usual value unless visibility="collapse" begin repetitions = Integer(col_desc.attribute('table:number-columns-repeated').value) rescue NoMethodError # no atttribute: keep repetitions = 1 end begin hidden = (col_desc.attribute('table:visibility').value == "collapse") rescue NoMethodError # no atttribute: keep hidden = false end # loop repetitions.times updating invisible_cols if necessary. repetitions.times { |ignored| invisible_cols << colnum if hidden colnum = colnum + 1 } # end repetitions.times } # end col_descs.each return invisible_cols end # end find_collapsed method private :find_collapsed ########################################################################### # students_row(text) takes some text and returns the (first) row that # contains the specified text in the id_column (or nil if no such row). ########################################################################### def students_row(id_text) sheet1_rows = @content.root.get_elements('.//table:table//table:table-row') rownum = 0 sheet1_rows.each { |row| rownum = rownum + 1 # XML counts from 1 so start with 0+1 catch(:no_match_this_row) do colnum = 0 cells = row.get_elements('./table:table-cell') cells.each { |cell| repetitions = 1 text = "" begin repetitions = Integer(cell.attribute('table:number-columns-repeated').value) rescue NoMethodError # not specified, keep repetitions = 1 end begin text = cell.elements['./text:p'].text rescue NoMethodError # not specified, keep text = "" end repetitions.times { if colnum == @id_column then if text == id_text then return rownum else throw :no_match_this_row end end colnum = colnum + 1 } # end repetitions.times } # end cells.each end # end do } # end sheet1_rows.each return nil end # method id_column ########################################################################### # students_html(row) takes a spreadsheet row number and returns a HTML # table containing the text from visible columns for the passed row and # from the fixed_rows. If the passed row is nil, then only processes the # contents of the fixed_rows. ############################################################################ def students_html(row) # array of row numbers to print in order. printable_rows = @fixed_rows.clone printable_rows = (printable_rows << row).sort_by {|x| x} if not row.nil? table_rows = @content.root.get_elements('.//table:table//table:table-row') # cell_array is array of table:table-cell for each printable row cell_array = [] printable_rows.each { |rownum| cell_array << table_rows[rownum - 1].get_elements('./table:table-cell') } # Now iterate over cell_array from 0 to length-1. # Iterate over cell_array[i] from 0 to length-1 # Expect one of three forms: # 3 blank columns # 1 blank column # "text" # text outstring = <<'EOF' spreadsheet row data EOF cell_array.each { |cells| outstring = outstring + "" colnum = 0 cells.each { |cell| repetitions = 1 text = "" begin repetitions = Integer(cell.attribute('table:number-columns-repeated').value) rescue NoMethodError # not specified, keep repetitions = 1 end begin text = cell.elements['./text:p'].text rescue NoMethodError # not specified, keep text = "" end repetitions.times { if not (@invisible_cols.include?(colnum)) then outstring << "" end colnum = colnum + 1 } # end repetitions.times } # end cells.each outstring << "\n " } # end cell_array.each outstring << <<'EOF'
" + text + "
EOF return outstring end # end students_html method end # end class ############################################################################### # # == Testing of SpreadsheetContent # # Takes an old grade sheet, some standard rows, and makes the column number for # the secret id be the column number for student name. # # The values here should not work except on my system, so if you want to try # the SpreadsheetContent class, fill in your own constants. ############################################################################### class Test_SpreadsheetContent # # A file that I happen to have sitting around # TEST_FILE = "~/earth/grade301-F03/gradesheet.sxc" # # Rows to be output in any use of this program: # 1 - column header # 2 - point count per assignment # 20 - number handing in assignment # 21 - mean in this particular spreadsheet # FIXED_ROWS = [1,2,20,21] # # The id column is the column to be matched against a student's input to # determine the row of the spreadsheet containing the student's data. # # Note that the text to match aginst must be passed to the application securely # and the text should be known only to the student that the text identifies # ID_COLUMN = 0 # # Use the constants to test SpreadsheetContent functionality # def Test_SpreadsheetContent.main() test_obj = SpreadsheetContent.new( TEST_FILE, FIXED_ROWS, ID_COLUMN) student_row = test_obj.students_row("0123456789") puts test_obj.students_html(student_row) end end #Test_SpreadsheetContent.main()