1<?php
2
3require_once(HTML2PS_DIR.'utils_units.php');
4
5function cmp_footnote_locations($a, $b) {
6  if ($a->get_location() == $b->get_location()) { return 0; };
7  return ($a->get_location() > $b->get_location()) ? -1 : 1;
8}
9
10class FootnoteLocation {
11  var $_location;
12  var $_content_height;
13
14  function FootnoteLocation($location, $content_height) {
15    $this->_location       = $location;
16    $this->_content_height = $content_height;
17  }
18
19  function get_location() {
20    return $this->_location;
21  }
22
23  function get_content_height() {
24    return $this->_content_height;
25  }
26}
27
28function cmp_page_break_locations($a, $b) {
29  if ($a->location == $b->location) { return 0; };
30  return ($a->location > $b->location) ? -1 : 1;
31}
32
33class PageBreakLocation {
34  var $location;
35  var $penalty;
36
37  function PageBreakLocation($location, $penalty) {
38    $this->location = round($location,2);
39    $this->penalty  = $penalty;
40  }
41
42  function get_footnotes_height($footnotes, $page_start, $location) {
43    $i = 0;
44    $size = count($footnotes);
45
46    $height = 0;
47
48    while ($i < $size && $footnotes[$i]->get_location() > $page_start) {
49      $i++;
50    };
51
52    $footnotes_count = 0;
53    while ($i < $size && $footnotes[$i]->get_location() > $location) {
54      $height += $footnotes[$i]->get_content_height();
55      $footnotes_count ++;
56      $i++;
57    };
58
59    if ($footnotes_count > 0) {
60      return
61        $height +
62        FOOTNOTE_LINE_TOP_GAP +
63        FOOTNOTE_LINE_BOTTOM_GAP +
64        FOOTNOTE_GAP * ($footnotes_count-1);
65    } else {
66      return 0;
67    };
68  }
69
70  function get_penalty($page_start, $max_page_height, $footnotes) {
71    $height_penalty = $this->get_page_break_height_penalty($page_start,
72                                                           $max_page_height - $this->get_footnotes_height($footnotes,
73                                                                                                          $page_start,
74                                                                                                          $this->location));
75
76    return $this->penalty + $height_penalty;
77  }
78
79  /**
80   * We should avoid page breaks  resulting in too much white space at
81   * the  page  bottom.  This  function  calculates  a  'penalty'  for
82   * breaking page at its current height.
83   */
84  function get_page_break_height_penalty($page_start, $max_page_height) {
85    $current_height = $page_start - $this->location;
86
87    if ($current_height > $max_page_height) {
88      return MAX_PAGE_BREAK_PENALTY;
89    };
90
91    $free_space = $max_page_height - $current_height;
92    $free_space_fraction = $free_space / $max_page_height;
93
94    if ($free_space_fraction < MAX_UNPENALIZED_FREE_FRACTION) {
95      return 0;
96    };
97
98    if ($free_space_fraction > MAX_FREE_FRACTION) {
99      return MAX_PAGE_BREAK_PENALTY;
100    };
101
102    return
103      ($free_space_fraction - MAX_UNPENALIZED_FREE_FRACTION) /
104      (MAX_FREE_FRACTION    - MAX_UNPENALIZED_FREE_FRACTION) *
105      MAX_PAGE_BREAK_HEIGHT_PENALTY;
106  }
107}
108
109/**
110 * Note that, according to CSS 2.1:
111 *
112 * A potential page break  location is typically under the influence
113 * of  the   parent  element's  'page-break-inside'   property,  the
114 * 'page-break-after'  property of  the preceding  element,  and the
115 * 'page-break-before' property of the following element. When these
116 * properties have  values other  than 'auto', the  values 'always',
117 * 'left', and 'right' take precedence over 'avoid'.
118 *
119 * AND
120 *
121 * A conforming user agent may interpret the values 'left' and 'right'
122 * as 'always'.
123 *
124 * AND
125 *
126 * In the normal flow, page breaks can occur at the following places:
127 *
128 * 1. In the vertical margin between block boxes. When a page break occurs here, the used values of the relevant 'margin-top' and 'margin-bottom' properties are set to '0'.
129 * 2. Between line boxes inside a block box.
130 */
131class PageBreakLocator {
132  function get_break_locations(&$dom_tree) {
133    $locations_ungrouped = PageBreakLocator::get_pages_traverse($dom_tree, 0);
134
135    /**
136     * If there's no page break locations (e.g. document is empty)
137     * generate one full-size page
138     */
139    if (count($locations_ungrouped) == 0) {
140      return array();
141    };
142
143    return PageBreakLocator::sort_locations($locations_ungrouped);
144  }
145
146  function get_footnotes_traverse(&$box) {
147    $footnotes = array();
148
149    if (is_a($box, 'BoxNoteCall')) {
150      $footnotes[] = new FootnoteLocation($box->get_top_margin(), $box->_note_content->get_full_height());
151    } elseif (is_a($box, 'GenericContainerBox')) {
152      foreach ($box->content as $child) {
153        $footnotes = array_merge($footnotes, PageBreakLocator::get_footnotes_traverse($child));
154      };
155    };
156
157    return $footnotes;
158  }
159
160  function get_pages(&$dom_tree, $max_page_height, $first_page_top) {
161    $current_page_top = $first_page_top;
162    $heights = array();
163
164    /**
165     * Get list of footnotes and heights of footnote content blocks
166     */
167    $footnotes = PageBreakLocator::get_footnotes_traverse($dom_tree);
168    usort($footnotes, 'cmp_footnote_locations');
169
170    $locations = PageBreakLocator::get_break_locations($dom_tree);
171
172    if (count($locations) == 0) {
173      return array($max_page_height);
174    };
175
176    $best_location = null;
177    foreach ($locations as $location) {
178      if ($location->location < $current_page_top) {
179        if (is_null($best_location)) {
180          $best_location = $location;
181        };
182
183        $current_pos = round_units($current_page_top - $location->location);
184        $available_page_height = round_units($max_page_height - $location->get_footnotes_height($footnotes, $current_page_top, $location->location));
185
186        if ($current_pos > $available_page_height) {
187          /**
188           * No more locations found on current page
189           */
190
191          $best_location_penalty = $best_location->get_penalty($current_page_top, $max_page_height, $footnotes);
192          if ($best_location_penalty >= MAX_PAGE_BREAK_PENALTY) {
193            error_log('Could not find good page break location');
194            $heights[] = $max_page_height;
195            $current_page_top -= $max_page_height;
196            $best_location = null;
197          } else {
198            $heights[] = $current_page_top - $best_location->location;
199            $current_page_top = $best_location->location;
200            $best_location = null;
201          };
202
203        } else {
204          $location_penalty = $location->get_penalty($current_page_top, $max_page_height, $footnotes);
205          $best_penalty = $best_location->get_penalty($current_page_top, $max_page_height, $footnotes);
206
207          if ($location_penalty <= $best_penalty) {
208            /**
209             * Better page break location found on current page
210             */
211            $best_location = $location;
212          };
213        };
214
215        if ($location->penalty < 0) { // Forced page break
216          $heights[]        = $current_page_top - $location->location;
217          $current_page_top = $location->location;
218          $best_location    = null;
219        };
220      };
221    };
222
223    // Last page always will have maximal height
224    $heights[] = $max_page_height;
225
226    return $heights;
227  }
228
229  function is_forced_page_break($value) {
230    return
231      $value == PAGE_BREAK_ALWAYS ||
232      $value == PAGE_BREAK_LEFT ||
233      $value == PAGE_BREAK_RIGHT;
234  }
235
236  function has_forced_page_break_before(&$box) {
237    return PageBreakLocator::is_forced_page_break($box->get_css_property(CSS_PAGE_BREAK_BEFORE));
238  }
239
240  function has_forced_page_break_after(&$box) {
241    return PageBreakLocator::is_forced_page_break($box->get_css_property(CSS_PAGE_BREAK_AFTER));
242  }
243
244  function get_pages_traverse_block(&$box, &$next, &$previous, $penalty) {
245    $locations = array();
246
247    // Absolute/fixed positioned blocks do not cause page breaks
248    // (CSS 2.1. 13.2.3 Content outside the page box)
249    $position = $box->get_css_property(CSS_POSITION);
250    if ($position == POSITION_FIXED || $position == POSITION_ABSOLUTE) {
251      return $locations;
252    };
253
254    // Fake cell boxes do not generate page break locations
255    if (is_a($box, 'FakeTableCellBox')) {
256      return $locations;
257    }
258
259    /**
260     * Check for breaks in block box vertical margin
261     */
262
263    /**
264     * Check for pre-breaks
265     */
266    if (PageBreakLocator::has_forced_page_break_before($box)) {
267      $location = new PageBreakLocation($box->get_top_margin(), FORCED_PAGE_BREAK_BONUS);
268    } elseif (!is_null($previous) && $previous->get_css_property(CSS_PAGE_BREAK_AFTER) == PAGE_BREAK_AVOID) {
269      $location = new PageBreakLocation($box->get_top_margin(), $penalty + PAGE_BREAK_AFTER_AVOID_PENALTY);
270    } elseif ($box->get_css_property(CSS_PAGE_BREAK_BEFORE) == PAGE_BREAK_AVOID) {
271      $location = new PageBreakLocation($box->get_top_margin(), $penalty + PAGE_BREAK_BEFORE_AVOID_PENALTY);
272    } else {
273      $location = new PageBreakLocation($box->get_top_margin(), $penalty);
274    };
275    $locations[] = $location;
276
277    /**
278     * Check for post-breaks
279     */
280    if (PageBreakLocator::has_forced_page_break_after($box)) {
281      $location = new PageBreakLocation($box->get_bottom_margin(), FORCED_PAGE_BREAK_BONUS);
282    } elseif (!is_null($next) && $next->get_css_property(CSS_PAGE_BREAK_BEFORE) == PAGE_BREAK_AVOID) {
283      $location = new PageBreakLocation($box->get_bottom_margin(), $penalty + PAGE_BREAK_AFTER_AVOID_PENALTY);
284    } elseif ($box->get_css_property(CSS_PAGE_BREAK_AFTER) == PAGE_BREAK_AVOID) {
285      $location = new PageBreakLocation($box->get_bottom_margin(), $penalty + PAGE_BREAK_AFTER_AVOID_PENALTY);
286    } else {
287      $location = new PageBreakLocation($box->get_bottom_margin(), $penalty);
288    }
289    $locations[] = $location;
290
291    /**
292     * Check for breaks inside this box
293     * Note that this check should be done after page-break-before/after checks,
294     * as 'penalty' value may be modified here
295     */
296    if ($box->get_css_property(CSS_PAGE_BREAK_INSIDE) == PAGE_BREAK_AVOID) {
297      $penalty += PAGE_BREAK_INSIDE_AVOID_PENALTY;
298    };
299
300    /**
301     * According to CSS 2.1, 13.3.5 'Best' page breaks,
302     * User agent shoud /Avoid breaking inside a block that has a border/
303     *
304     * From my point of view, top and bottom borders should not affect page
305     * breaks (as they're not broken by page break), while left and right ones - should.
306     */
307    $border_left =& $box->get_css_property(CSS_BORDER_LEFT);
308    $border_right =& $box->get_css_property(CSS_BORDER_RIGHT);
309
310    $has_left_border = $border_left->style != BS_NONE && $border_left->width->getPoints() > 0;
311    $has_right_border = $border_left->style != BS_NONE && $border_left->width->getPoints() > 0;
312
313    if ($has_left_border || $has_right_border) {
314      $penalty += PAGE_BREAK_BORDER_PENALTY;
315    };
316
317    /**
318     * Process box content
319     */
320    $locations = array_merge($locations, PageBreakLocator::get_pages_traverse($box, $penalty));
321
322    return $locations;
323  }
324
325  function get_more_before($base, $content, $size) {
326    $i = $base;
327    $more_before = 0;
328
329    while ($i > 0) {
330      $i--;
331      if (is_a($content[$i], 'InlineBox')) {
332        $more_before += $content[$i]->get_line_box_count();
333      } elseif (is_a($content[$i], 'BRBox') ||
334                is_a($content[$i], 'GenericInlineBox')) {
335        // Do nothing
336      } else {
337        return $more_before;
338      };
339    };
340
341    return $more_before;
342  }
343
344  function get_more_after($base, $content, $size) {
345    $i = $base;
346    $more = 0;
347
348    while ($i < $size-1) {
349      $i++;
350      if (is_a($content[$i], 'InlineBox')) {
351        $more += $content[$i]->getLineBoxCount();
352      } elseif (is_a($content[$i], 'BRBox')  ||
353                is_a($content[$i], 'GenericInlineBox')) {
354        // Do nothing
355      } else {
356        return $more;
357      };
358    };
359
360    return $more;
361  }
362
363  function get_pages_traverse_table_row(&$box, $penalty) {
364    $locations = array();
365
366    $cells = $box->getChildNodes();
367
368    // Find first non-fake (not covered by a table row or cell span) cell
369    $i = 0;
370    $size = count($cells);
371    while ($i < $size &&
372           $cells[$i]->is_fake()) {
373      $i++;
374    };
375    // Now $i contains the index of the first content cell or $size of there was no one
376    if ($i < $size) {
377      $locations[] = new PageBreakLocation($cells[$i]->get_top_margin(),    $penalty);
378      $locations[] = new PageBreakLocation($cells[$i]->get_bottom_margin(), $penalty);
379    };
380
381    $content_watermark = $cells[0]->get_top_margin() - $cells[0]->get_real_full_height();
382
383    /**
384     * Process row content
385     */
386    $inside_penalty = $penalty;
387    if ($box->get_css_property(CSS_PAGE_BREAK_INSIDE) == PAGE_BREAK_AVOID) {
388      $inside_penalty += PAGE_BREAK_INSIDE_AVOID_PENALTY;
389    };
390
391    $cells = $box->getChildNodes();
392    $null = null;
393    $ungrouped_row_locations = PageBreakLocator::get_pages_traverse_block($cells[0],
394                                                                          $null,
395                                                                          $null,
396                                                                          $inside_penalty);
397    $row_locations = PageBreakLocator::sort_locations($ungrouped_row_locations);
398
399    for ($i=1, $size = count($cells); $i < $size; $i++) {
400      $ungrouped_child_locations = PageBreakLocator::get_pages_traverse_block($cells[$i],
401                                                                              $null,
402                                                                              $null,
403                                                                              $inside_penalty);
404      $child_locations = PageBreakLocator::sort_locations($ungrouped_child_locations);
405
406      $current_cell_content_watermark = $cells[$i]->get_top_margin() - $cells[$i]->get_real_full_height();
407
408      $new_row_locations = array();
409
410      // Keep only locations available in all cells
411
412      $current_row_location_index = 0;
413      while ($current_row_location_index < count($row_locations)) {
414        $current_row_location = $row_locations[$current_row_location_index];
415
416        // Check if current row-wide location is below the current cell content;
417        // in this case, accept it immediately
418        if ($current_row_location->location < $current_cell_content_watermark) {
419          $new_row_locations[] = $current_row_location;
420        } else {
421          // Match all row locations agains the current cell's
422          for ($current_child_location_index = 0, $child_locations_total = count($child_locations);
423               $current_child_location_index < $child_locations_total;
424               $current_child_location_index++) {
425            $current_child_location = $child_locations[$current_child_location_index];
426            if ($current_child_location->location == $current_row_location->location) {
427              $new_row_locations[] = new PageBreakLocation($current_child_location->location,
428                                                           max($current_child_location->penalty,
429                                                               $current_row_location->penalty));
430            };
431          };
432        };
433
434        $current_row_location_index++;
435      };
436
437      // Add locations available below content in previous cells
438
439      for ($current_child_location_index = 0, $child_locations_total = count($child_locations);
440           $current_child_location_index < $child_locations_total;
441           $current_child_location_index++) {
442        $current_child_location = $child_locations[$current_child_location_index];
443        if ($current_child_location->location < $content_watermark) {
444          $new_row_locations[] = new PageBreakLocation($current_child_location->location,
445                                                       $current_child_location->penalty);
446        };
447      };
448
449      $content_watermark = min($content_watermark, $cells[$i]->get_top_margin() - $cells[$i]->get_real_full_height());
450
451      $row_locations = $new_row_locations;
452    };
453
454    $locations = array_merge($locations, $row_locations);
455    return $locations;
456  }
457
458  function get_pages_traverse_inline(&$box, $penalty, $more_before, $more_after) {
459    $locations = array();
460
461    /**
462     * Check for breaks between line boxes
463     */
464
465    $size = $box->get_line_box_count();
466
467    if ($size == 0) {
468      return $locations;
469    };
470
471    // If there was  a BR box before current  inline box (indicated by
472    // $more_before parameter > 0), we  may break page on the top edge
473    // of the first line box
474    if ($more_before > 0) {
475      if ($more_before < $box->parent->get_css_property(CSS_ORPHANS)) {
476        $orphans_penalty = PAGE_BREAK_ORPHANS_PENALTY;
477      } else {
478        $orphans_penalty = 0;
479      };
480
481      if ($box->parent->get_css_property(CSS_WIDOWS) > $size + $more_after) {
482        $widows_penalty  = PAGE_BREAK_WIDOWS_PENALTY;
483      } else {
484        $widows_penalty  = 0;
485      };
486
487      $line_box = $box->get_line_box(0);
488      $locations[] = new PageBreakLocation($line_box->top,
489                                           $penalty + PAGE_BREAK_LINE_PENALTY + $orphans_penalty + $widows_penalty);
490    };
491
492    // If there  was a BR box  after current inline  box (indicated by
493    // $more_after parameter >  0), we may break page  on the top edge
494    // of the first line box
495    if ($more_after > 0) {
496      if ($size + 1 + $more_before < $box->parent->get_css_property(CSS_ORPHANS)) {
497        $orphans_penalty = PAGE_BREAK_ORPHANS_PENALTY;
498      } else {
499        $orphans_penalty = 0;
500      };
501
502      if ($size + 1 + $box->parent->get_css_property(CSS_WIDOWS) > $size + $more_after) {
503        $widows_penalty  = PAGE_BREAK_WIDOWS_PENALTY;
504      } else {
505        $widows_penalty  = 0;
506      };
507
508      $line_box = $box->getLineBox($size-1);
509      $locations[] = new PageBreakLocation($line_box->bottom,
510                                           $penalty + PAGE_BREAK_LINE_PENALTY + $orphans_penalty + $widows_penalty);
511    };
512
513    // Note that we're  ignoring the last line box  inside this inline
514    // box; it is required, as bottom of the last line box will be the
515    // same as  the bottom of  the container block box.  Break penalty
516    // should be calculated using block-box level data
517    for ($i = 0; $i < $size - 1; $i++) {
518      $line_box = $box->get_line_box($i);
519
520      if ($i + 1 + $more_before < $box->parent->get_css_property(CSS_ORPHANS)) {
521        $orphans_penalty = PAGE_BREAK_ORPHANS_PENALTY;
522      } else {
523        $orphans_penalty = 0;
524      };
525
526      if ($i + 1 + $box->parent->get_css_property(CSS_WIDOWS) > $size + $more_after) {
527        $widows_penalty  = PAGE_BREAK_WIDOWS_PENALTY;
528      } else {
529        $widows_penalty  = 0;
530      };
531
532      $locations[] = new PageBreakLocation($line_box->bottom,
533                                           $penalty + PAGE_BREAK_LINE_PENALTY + $orphans_penalty + $widows_penalty);
534    };
535
536    return $locations;
537  }
538
539  function &get_previous($index, $content, $size) {
540    for ($i = $index - 1; $i>=0; $i--) {
541      $child = $content[$i];
542      if (!$child->is_null()) {
543        return $child;
544      };
545    };
546
547    $dummy = null;
548    return $dummy;
549  }
550
551  function &get_next($index, &$content, $size) {
552    for ($i=$index + 1; $i<$size; $i++) {
553      $child =& $content[$i];
554      if (!$child->is_null()) {
555        return $child;
556      };
557    };
558
559    $dummy = null;
560    return $dummy;
561  }
562
563  function get_pages_traverse(&$box, $penalty) {
564    if (!is_a($box, 'GenericContainerBox')) {
565      return array();
566    };
567
568    $locations = array();
569
570    for ($i=0, $content_size = count($box->content); $i<$content_size; $i++) {
571      $previous_child =& PageBreakLocator::get_previous($i, $box->content, $content_size);
572      $next_child     =& PageBreakLocator::get_next($i, $box->content, $content_size);
573      $child          =& $box->content[$i];
574
575      /**
576       * Note that page-break-xxx properties apply to block-level elements only
577       */
578      if (is_a($child, 'BRBox')) {
579        // Do nothing
580      } elseif ($child->isBlockLevel()) {
581        $locations = array_merge($locations, PageBreakLocator::get_pages_traverse_block($child,
582                                                                                        $next_child,
583                                                                                        $previous_child,
584                                                                                        $penalty));
585
586      } elseif (is_a($child, 'TableCellBox')) {
587        $null = null;
588        $child_locations = PageBreakLocator::get_pages_traverse_block($child, $null, $null, $penalty);
589        $locations = array_merge($locations, $child_locations);
590      } elseif (is_a($child, 'InlineBox')) {
591        $more_before = 0;
592        $more_after  = 0;
593
594        if (is_a($previous_child, 'BRBox')) {
595          $more_before = PageBreakLocator::get_more_before($i, $box->content, $content_size);
596        };
597
598        if (is_a($next_child, 'BRBox')) {
599          $more_after = PageBreakLocator::get_more_after($i, $box->content, $content_size);
600        };
601
602        $locations = array_merge($locations, PageBreakLocator::get_pages_traverse_inline($child, $penalty, $more_before, $more_after));
603      } elseif (is_a($child, 'TableRowBox')) {
604        $locations = array_merge($locations, PageBreakLocator::get_pages_traverse_table_row($child, $penalty));
605      };
606    };
607
608    return $locations;
609  }
610
611  function sort_locations($locations_ungrouped) {
612    if (count($locations_ungrouped) == 0) {
613      return array();
614    };
615
616    usort($locations_ungrouped, 'cmp_page_break_locations');
617
618    $last_location = $locations_ungrouped[0];
619    $locations = array();
620    foreach ($locations_ungrouped as $location) {
621      if ($last_location->location != $location->location) {
622        $locations[] = $last_location;
623        $last_location = $location;
624      } else {
625        if ($last_location->penalty >= 0 && $location->penalty >= 0) {
626          $last_location->penalty = max($last_location->penalty, $location->penalty);
627        } else {
628          $last_location->penalty = min($last_location->penalty, $location->penalty);
629        };
630      };
631    };
632    $locations[] = $last_location;
633
634    return $locations;
635  }
636}
637?>