Changeset 442

Show
Ignore:
Timestamp:
10/05/07 18:16:12 (1 year ago)
Author:
ogawa
Message:

apply r440 changes to batch_est_crawler.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • EstCrawler/trunk/EstCrawler/tools/batch_est_cralwer

    r439 r442  
    4040    my $cdate = ts2iso($entry->blog, $entry->authored_on); 
    4141    my $mdate = ts2iso($entry->blog, $entry->modified_on); 
     42    my $categories = join('', map { '[' . $_->label . ']' } @{$entry->categories}) 
     43        if $entry->categories; 
     44    my $tags = join('', map { '[' . $_ . ']' } $entry->tags) 
     45        if $entry->tags; 
    4246 
    4347    # metainfo (attribute, not searchable) 
     
    4751    $doc->add_attr('@cdate', $cdate); 
    4852    $doc->add_attr('@mdate', $mdate); 
     53    $doc->add_attr('entry_id', $entry->id); 
     54    $doc->add_attr('blog_id', $entry->blog_id); 
     55    $doc->add_attr('categories', $categories) if $categories; 
     56    $doc->add_attr('tags', $tags) if $tags; 
    4957 
    5058    # document body (searchable) 
     
    5563    $doc->add_hidden_text($title); 
    5664    $doc->add_hidden_text($author); 
    57     $doc->add_hidden_text(join(', ', map { $_->label } @{$entry->categories}) || ''); 
    58     $doc->add_hidden_text(join(', ', $entry->tags) || ''); 
     65    $doc->add_hidden_text($categories || ''); 
     66    $doc->add_hidden_text($tags || ''); 
    5967    $doc->add_hidden_text(remove_html($entry->keywords) || ''); 
    6068    $doc;