Skip to content

Commit e4d84ac

Browse files
committed
Collect and write type info to property pages (fix #4)
1 parent f4a59c6 commit e4d84ac

1 file changed

Lines changed: 81 additions & 18 deletions

File tree

main.go

Lines changed: 81 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,13 @@ const (
407407
subClassPropertyURI = "http://www.w3.org/2000/01/rdf-schema#subClassOf"
408408
)
409409

410+
const (
411+
dataTypeURIString = "http://www.w3.org/2001/XMLSchema#string"
412+
dataTypeURILangString = "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
413+
dataTypeURIInteger = "http://www.w3.org/2001/XMLSchema#integer"
414+
dataTypeURIFloat = "http://www.w3.org/2001/XMLSchema#float"
415+
)
416+
410417
const (
411418
_ = iota
412419
URITypeUndefined
@@ -432,7 +439,11 @@ func NewTripleAggregateToWikiPageConverter() *TripleAggregateToWikiPageConverter
432439

433440
func (p *TripleAggregateToWikiPageConverter) Run() {
434441
defer close(p.OutPage)
442+
443+
predPageIndex := make(map[string]*WikiPage)
444+
435445
resourceIndex := <-p.InIndex
446+
436447
for aggr := range p.InAggregate {
437448
pageType := p.determineType(aggr)
438449

@@ -442,11 +453,40 @@ func (p *TripleAggregateToWikiPageConverter) Run() {
442453

443454
for _, tr := range aggr.Triples {
444455

445-
_, propertyStr := p.convertUriToWikiTitle(tr.Pred.String(), URITypePredicate, resourceIndex) // Here we know it is a predicate, simply because its location in a triple
456+
predTitle, propertyStr := p.convertUriToWikiTitle(tr.Pred.String(), URITypePredicate, resourceIndex) // Here we know it is a predicate, simply because its location in a triple
457+
458+
// Make sure property page exists
459+
if predPageIndex[predTitle] == nil {
460+
predPageIndex[predTitle] = NewWikiPage(predTitle, []*Fact{}, []string{}, URITypePredicate)
461+
}
462+
463+
var valueStr string
446464

447-
valueAggr := (*resourceIndex)[tr.Obj.String()]
448-
valueUriType := p.determineType(valueAggr)
449-
_, valueStr := p.convertUriToWikiTitle(tr.Obj.String(), valueUriType, resourceIndex)
465+
if tr.Obj.Type() == rdf.TermIRI {
466+
467+
valueAggr := (*resourceIndex)[tr.Obj.String()]
468+
valueUriType := p.determineType(valueAggr)
469+
_, valueStr = p.convertUriToWikiTitle(tr.Obj.String(), valueUriType, resourceIndex)
470+
471+
predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Page"))
472+
473+
} else if tr.Obj.Type() == rdf.TermLiteral {
474+
475+
valueStr = tr.Obj.String()
476+
dataTypeStr := tr.Obj.(rdf.Literal).DataType.String()
477+
478+
// Add type info on the current property's page
479+
switch dataTypeStr {
480+
case dataTypeURIString:
481+
predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Text"))
482+
case dataTypeURILangString:
483+
predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Text"))
484+
case dataTypeURIInteger:
485+
predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Number"))
486+
case dataTypeURIFloat:
487+
predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Number"))
488+
}
489+
}
450490

451491
if tr.Pred.String() == typePropertyURI || tr.Pred.String() == subClassPropertyURI {
452492

@@ -464,26 +504,36 @@ func (p *TripleAggregateToWikiPageConverter) Run() {
464504

465505
} else {
466506

467-
factExists := false
468-
for _, existingFact := range page.Facts {
469-
if propertyStr == existingFact.Property && valueStr == existingFact.Value {
470-
factExists = true
471-
break
472-
}
473-
}
474-
475-
if !factExists {
476-
fact := NewFact(propertyStr, valueStr)
477-
page.AddFact(fact)
478-
}
507+
page.AddFactUnique(NewFact(propertyStr, valueStr))
479508
}
480509
}
481510

482511
// Add Equivalent URI fact
483512
equivURIFact := NewFact("Equivalent URI", aggr.Subject.String())
484-
page.AddFact(equivURIFact)
513+
page.AddFactUnique(equivURIFact)
514+
515+
// Don't send predicates just yet (we want to gather facts about them,
516+
// and send at the end) ...
517+
if pageType == URITypePredicate {
518+
if predPageIndex[page.Title] != nil {
519+
// Add facts and categories to existing page
520+
for _, fact := range page.Facts {
521+
predPageIndex[page.Title].AddFactUnique(fact)
522+
}
523+
for _, cat := range page.Categories {
524+
predPageIndex[page.Title].AddCategory(cat)
525+
}
526+
} else {
527+
// If page does not exist, use the newly created one
528+
predPageIndex[page.Title] = page
529+
}
530+
} else {
531+
p.OutPage <- page
532+
}
533+
}
485534

486-
p.OutPage <- page
535+
for _, predPage := range predPageIndex {
536+
p.OutPage <- predPage
487537
}
488538
}
489539

@@ -895,6 +945,19 @@ func (p *WikiPage) AddFact(fact *Fact) {
895945
p.Facts = append(p.Facts, fact)
896946
}
897947

948+
func (p *WikiPage) AddFactUnique(fact *Fact) {
949+
factExists := false
950+
for _, existingFact := range p.Facts {
951+
if fact.Property == existingFact.Property && fact.Value == existingFact.Value {
952+
factExists = true
953+
break
954+
}
955+
}
956+
if !factExists {
957+
p.AddFact(fact)
958+
}
959+
}
960+
898961
func (p *WikiPage) AddCategory(category string) {
899962
p.Categories = append(p.Categories, category)
900963
}

0 commit comments

Comments
 (0)