Skip to content

Commit 838f7a2

Browse files
committed
2 parents 787a258 + ba27af7 commit 838f7a2

6 files changed

Lines changed: 81 additions & 142 deletions

File tree

migration/tests/corpusfortesting.py

Lines changed: 59 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,20 @@
33
44
@author: wf
55
'''
6-
from datasources.openresearch import OREventCorpus, OREventManager
6+
from corpus.lookup import CorpusLookup
77
from lodstorage.storageconfig import StorageConfig
8-
98
from ormigrate.toolbox import HelperFunctions as hf
109
from wikifile.wikiFileManager import WikiFileManager
1110
from os import path
1211
import os
12+
13+
1314
class CorpusForTesting(object):
1415
'''
1516
Simplify initializing an EventCorpus for tests (singleton)
1617
'''
17-
1818
wikiId='orclone'
1919

20-
2120
@classmethod
2221
def hasCache(cls):
2322
'''
@@ -48,29 +47,70 @@ def getWikiFileManager(cls,wikiId=None,debug=False):
4847
return wikiFileManager
4948

5049
@classmethod
51-
def getEventCorpusFromWikiAPI(cls, wikiId=None, force=False, debug=False):
50+
def getEventDataSourceFromWikiAPI(cls, lookupId:str= "orclone", forceUpdate:bool=False, debug:bool=False):
5251
'''
5352
get events with series by knitting / linking the entities together
53+
54+
Args:
55+
lookupId(str): ID of the EventDataSource that should be returned.
56+
forceUpdate(bool): True if the data should be fetched from the source instead of the cache
57+
debug(bool): If True display debug output
58+
59+
Returns:
60+
EventDataSource
5461
'''
55-
wikiUser=cls.getWikiUser(wikiId)
56-
config = cls.getStorageConfig()
57-
eventCorpus=OREventCorpus(config,debug=debug)
58-
eventCorpus.fromCache(wikiUser,force=force)
59-
eventCorpus.wikiFileManager=cls.getWikiFileManager(wikiId, debug)
60-
return eventCorpus
62+
eventDataSource=cls.getEventDataSource(lookupId=lookupId, forceUpdate=forceUpdate, debug=debug)
63+
return eventDataSource
6164

6265
@classmethod
63-
def getEventCorpusFromWikiText(cls,wikiId=None,debug=False):
66+
def getEventDataSourceFromWikiText(cls, lookupId:str= "orclone-backup", forceUpdate:bool=False, debug=False):
6467
"""
6568
get events with series by knitting/linking entities from a WikiFileManager
69+
70+
Args:
71+
lookupId(str): ID of the EventDataSource that should be returned.
72+
forceUpdate(bool): True if the data should be fetched from the source instead of the cache
73+
debug(bool): If True display debug output
74+
75+
Returns:
76+
EventDataSource
6677
"""
67-
if wikiId is None:
68-
wikiId=cls.wikiId
69-
config=cls.getStorageConfig()
70-
wikiFileManager=cls.getWikiFileManager(wikiId,debug)
71-
eventCorpus=OREventCorpus(config,debug=debug)
72-
eventCorpus.fromWikiFileManager(wikiFileManager)
73-
return eventCorpus
78+
eventDataSource = cls.getEventDataSource(lookupId=lookupId, forceUpdate=forceUpdate, debug=debug)
79+
return eventDataSource
80+
81+
@classmethod
82+
def getEventDataSource(cls, lookupId:str, forceUpdate:bool=False, debug:bool=False):
83+
"""
84+
85+
Args:
86+
lookupId(str): ID of the EventDataSource that should be returned.
87+
forceUpdate(bool): True if the data should be fetched from the source instead of the cache
88+
debug(bool): If True display debug output
89+
90+
Returns:
91+
EventDataSource
92+
"""
93+
lookup = CorpusLookup(lookupIds=[lookupId], configure=cls.patchEventSource, debug=debug)
94+
lookup.load(forceUpdate=forceUpdate)
95+
eventDataSource = lookup.getDataSource(lookupId)
96+
return eventDataSource
97+
98+
@classmethod
99+
def patchEventSource(cls, lookup:CorpusLookup):
100+
'''
101+
patches the EventManager and EventSeriesManager by adding wikiUser and WikiFileManager
102+
'''
103+
wikiUser = cls.getWikiUser(cls.wikiId)
104+
wikiFileManager = cls.getWikiFileManager(cls.wikiId)
105+
for lookupId in ["orclone", "orclone-backup", "or", "or-backup"]:
106+
orDataSource = lookup.getDataSource(lookupId)
107+
if orDataSource is not None:
108+
if lookupId.endswith("-backup"):
109+
orDataSource.eventManager.wikiFileManager = wikiFileManager
110+
orDataSource.eventSeriesManager.wikiFileManager = wikiFileManager
111+
else:
112+
orDataSource.eventManager.wikiUser = wikiUser
113+
orDataSource.eventSeriesManager.wikiUser = wikiUser
74114

75115
@classmethod
76116
def getStorageConfig(cls):

migration/tests/testIssue168_ratingCallBack.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def testPageRatingList(self):
6666
# test the rating call back
6767
# '''
6868
# profile=Profiler("test rating call back")
69-
# eventCorpus=Corpus.getEventCorpusFromWikiAPI(debug=self.debug)
69+
# eventCorpus=Corpus.getEventDataSourceFromWikiAPI(debug=self.debug)
7070
# lod,errors=eventCorpus.eventList.getRatedLod(Event.rateMigration)
7171
# self.checkRatedLod(lod, errors)
7272
# lod,errors=eventCorpus.eventSeriesList.getRatedLod(EventSeries.rateMigration)

migration/tests/testIssue170_Curation.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from ormigrate.issue170_curation import CurationQualityChecker
99
from tests.corpusfortesting import CorpusForTesting as Corpus
1010
from collections import Counter
11-
from tests.pagefixtoolbox import PageFixerToolbox, PageFixerTest
11+
from tests.pagefixtoolbox import PageFixerTest
1212

1313
class TestIssue170(PageFixerTest):
1414
'''
@@ -47,8 +47,8 @@ def testCurationQualityCheck170(self):
4747

4848
def testUserCount(self):
4949
# only needed to setup userrating yaml file
50-
eventCorpus=Corpus.getEventCorpusFromWikiAPI(debug=self.debug, force=True)
51-
userLookup=eventCorpus.eventManager.getLookup("lastEditor",withDuplicates=True)
50+
eventDataSource=Corpus.getEventDataSourceFromWikiAPI(debug=self.debug, forceUpdate=True)
51+
userLookup=eventDataSource.eventManager.getLookup("lastEditor",withDuplicates=True)
5252
if self.debug:
5353
print (f"{len(userLookup)} users")
5454
expected=1 if hf.inPublicCI() else 140

migration/tests/testIssue236_CSV.py

Lines changed: 0 additions & 101 deletions
This file was deleted.

migration/tests/test_EventCorpus.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ class TestEventCorpus(unittest.TestCase):
5252
def setUp(self):
5353
self.debug = False
5454
self.profile=True
55-
self.eventCorpusAPI=None
56-
self.eventCorpusWikiText=None
55+
self.eventDataSourceAPI=None
56+
self.eventDataSourceWikiText=None
5757
pass
5858

5959
def tearDown(self):
@@ -85,31 +85,31 @@ def testEventCorpusFromWikiUser(self):
8585
'''
8686
self.debug=True
8787
profile=Profiler("getting EventCorpus from WikiUser")
88-
eventCorpus=Corpus.getEventCorpusFromWikiAPI(debug=self.debug, force=True)
88+
eventDataSource=Corpus.getEventDataSourceFromWikiAPI(forceUpdate=True, debug=self.debug)
8989
profile.time()
90-
self.checkEventCorpus(eventCorpus)
91-
92-
90+
self.checkEventCorpus(eventDataSource)
91+
92+
9393
def testEventCorpusFromWikiUserCache(self):
9494
"""
9595
test the Event Corpus from the wikiUser(API) cache.
9696
"""
9797
debug = True
9898
if Corpus.hasCache():
9999
profile=Profiler(f"getting EventCorpus for {Corpus.wikiId} from WikiUser Cache",self.profile)
100-
self.eventCorpusAPI=Corpus.getEventCorpusFromWikiAPI(debug=debug, force=False)
100+
self.eventDataSourceAPI=Corpus.getEventDataSourceFromWikiAPI(debug=debug, forceUpdate=False)
101101
profile.time()
102-
self.checkEventCorpus(self.eventCorpusAPI)
102+
self.checkEventCorpus(self.eventDataSourceAPI)
103103

104104

105105
def testEventCorpusFromWikiFileManager(self):
106106
"""
107107
test the Event Corpus from the wiki file manager(wikiFiles).
108108
"""
109109
profile=Profiler(f"getting EventCorpus from wikiText files for {Corpus.wikiId}")
110-
self.eventCorpusWikiText = Corpus.getEventCorpusFromWikiText(debug=self.debug)
110+
self.eventDataSourceWikiText = Corpus.getEventDataSourceFromWikiText(forceUpdate=True, debug=self.debug)
111111
profile.time()
112-
self.checkEventCorpus(self.eventCorpusWikiText,['pageTitle'])
112+
self.checkEventCorpus(self.eventDataSourceWikiText, ['pageTitle'])
113113

114114

115115
def testMatchingSetsForEventCorpus(self):
@@ -119,15 +119,15 @@ def testMatchingSetsForEventCorpus(self):
119119
if not Corpus.hasCache():
120120
return
121121
profile=Profiler(f"getting EventCorpora from wikiAPI and wikiText files for {Corpus.wikiId}")
122-
if self.eventCorpusAPI is None:
123-
self.eventCorpusAPI=Corpus.getEventCorpusFromWikiAPI(debug=self.debug, force=False)
124-
if self.eventCorpusWikiText is None:
125-
self.eventCorpusWikiText = Corpus.getEventCorpusFromWikiText(debug=self.debug)
122+
if self.eventDataSourceAPI is None:
123+
self.eventDataSourceAPI=Corpus.getEventDataSourceFromWikiAPI(debug=self.debug, forceUpdate=False)
124+
if self.eventDataSourceWikiText is None:
125+
self.eventDataSourceWikiText = Corpus.getEventDataSourceFromWikiText(debug=self.debug)
126126
profile.time()
127127
profile=Profiler(f"finding common events and series for {Corpus.wikiId}")
128128
keys=["acronym","pageTitle"]
129-
eventSet=MatchingSet("Events","api",self.eventCorpusAPI.eventManager,"wikiText",self.eventCorpusWikiText.eventManager,keys)
130-
eventSeriesSet=MatchingSet("EventSeries","api",self.eventCorpusAPI.eventSeriesManager,"wikiText",self.eventCorpusWikiText.eventSeriesManager,keys)
129+
eventSet=MatchingSet("Events","api", self.eventDataSourceAPI.eventManager, "wikiText", self.eventDataSourceWikiText.eventManager, keys)
130+
eventSeriesSet=MatchingSet("EventSeries","api", self.eventDataSourceAPI.eventSeriesManager, "wikiText", self.eventDataSourceWikiText.eventSeriesManager, keys)
131131
profile.time()
132132
eventSet.showStats()
133133
eventSeriesSet.showStats()

migration/tests/test_EventLocationContext.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def setUp(self) -> None:
1717
targetWikiTextPath = f"{home}/.or/generated/Location"
1818
self.wikiFileManager = CorpusForTesting.getWikiFileManager()
1919
self.wikiFileManager.targetPath=targetWikiTextPath
20-
self.eventCorpus=CorpusForTesting.getEventCorpusFromWikiText()
20+
self.eventDataSource=CorpusForTesting.getEventDataSourceFromWikiText()
2121
self.eventLocationContext=EventLocationHandler(wikiFileManager=self.wikiFileManager)
2222

2323
def test_generateLocationPages(self):
@@ -37,7 +37,7 @@ def test_generateORLocationPages(self):
3737
Note: Not running in CI since it generates a lot of pages and uses functionalities which are test in the other tests
3838
"""
3939
profile = Profiler("Generate OPENRESEARCH location pages (Limited to 100 events)", self.profile)
40-
self.eventLocationContext.generateORLocationPages(self.eventCorpus.eventManager.events[:100], overwrite=True)
40+
self.eventLocationContext.generateORLocationPages(self.eventDataSource.eventManager.events[:100], overwrite=True)
4141
profile.time()
4242
# ToDo: test if generated correctly
4343

0 commit comments

Comments
 (0)