diff --git a/profiler/.DS_Store b/profiler/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5cc2e00eabf324dad5fbc6c075cf6651492d95a8 Binary files /dev/null and b/profiler/.DS_Store differ diff --git a/profiler/Dockerfile b/profiler/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..9c7821fb77393b06f85d0acc14800a1e4f6919a5 --- /dev/null +++ b/profiler/Dockerfile @@ -0,0 +1,39 @@ +# --- Stage 1: API Dependency Loader + +# :: Initial dependency loading image. +FROM python:3.7 +#FROM python:3.7-slim as loader + +RUN apt-get update +RUN pip install --upgrade pip +RUN apt-get install -y supervisor + +COPY src/requirements.txt . +RUN pip install -r requirements.txt +# Copy Python API. +#FROM python:slim-buster +#RUN apk add --no-cache libstdc++ +#COPY --from=loader /root/.local /root/.local +#COPY --from=loader /usr/bin/supervisord /usr/bin/supervisord +#COPY --from=loader /etc/supervisor /etc/supervisor +#ENV PATH=/root/.local:$PATH + +RUN mkdir /profiler +COPY ./src /profiler/ +#RUN git clone https://www.github.com/alexandrosraikos/dependency-extractor +#RUN pip install ./dependency-extractor +WORKDIR /profiler + +EXPOSE 7878 +ENV GIT_PYTHON_REFRESH=quiet + +# Execute both in entrypoint.sh. +#ENTRYPOINT ["/entrypoint.sh"] +RUN mkdir -p /run/pid +RUN mkdir -p /var/log/supervisor + +#CMD ["/profiler/start.sh"] +COPY ./src/supervisord.conf /etc/supervisor/conf.d/supervisord.conf +CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] + + diff --git a/profiler/Dockerfile-original b/profiler/Dockerfile-original new file mode 100644 index 0000000000000000000000000000000000000000..52a1b3f4e2b0eb048079e87bc6234287fe631d8a --- /dev/null +++ b/profiler/Dockerfile-original @@ -0,0 +1,30 @@ +# --- Stage 1: API Dependency Loader + +# :: Initial dependency loading image. +FROM python:3.7-slim as api-loader + +# Get package dependencies. +COPY src/requirements.txt . +RUN pip install --upgrade pip +RUN pip install --user -r requirements.txt + +# --- Stage 2: Combined InfluxDB + Python API Image +FROM python:3.7-alpine3.12 + +# :: Python API setup +# Copy compiled dependencies from +# the standard user pip directory +# and update PATH. +COPY --from=api-loader /root/.local /root/.local +ENV PATH=/root/.local:$PATH + +# Copy Python API. +RUN mkdir -p profiler/dextractor +COPY ./src ./profiler/ +ADD /src/extractor/dextractor/ ./profiler/dextractor +WORKDIR /profiler +ENV GIT_PYTHON_REFRESH=quiet +# Execute both in entrypoint.sh. +#ENTRYPOINT ["/entrypoint.sh"] +CMD ["python","-u","profiler.py"] + diff --git a/profiler/docker-compose.yaml b/profiler/docker-compose.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b273132898c1d7be130f2ff29fa605d899df91a4 --- /dev/null +++ b/profiler/docker-compose.yaml @@ -0,0 +1,73 @@ +version: '2' + +services: + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:6.4.1 + container_name: elasticsearch + environment: + - cluster.name=docker-cluster + - bootstrap.memory_lock=true + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + ulimits: + memlock: + soft: -1 + hard: -1 + volumes: + - ./profiler/db:/usr/share/elasticsearch/data + ports: + - 9200:9200 + + activemq: + image: jdtotow/activemq + container_name: activemq + ports: + # mqtt + - "1883:1883" + # amqp + - "5672:5672" + # ui + - "8161:8161" + # stomp + - "61613:61613" + # ws + - "61614:61614" + # jms + - "61616:61616" + # jms prometheus agent + - "8080:8080" + #volumes: ["activemq-data:/opt/activemq/conf", "activemq-data:/data/activemq", "activemq-data:/var/log/activemq"] + environment: + ACTIVEMQ_REMOVE_DEFAULT_ACCOUNT: "true" + ACTIVEMQ_ADMIN_LOGIN: aaa + ACTIVEMQ_ADMIN_PASSWORD: "111" + ACTIVEMQ_WRITE_LOGIN: aaa + ACTIVEMQ_WRITE_PASSWORD: "111" + ACTIVEMQ_READ_LOGIN: aaa + ACTIVEMQ_READ_PASSWORD: "111" + ACTIVEMQ_JMX_LOGIN: aaa + ACTIVEMQ_JMX_PASSWORD: "111" + ACTIVEMQ_STATIC_TOPICS: static-topic-1;static-topic-2 + ACTIVEMQ_STATIC_QUEUES: static-queue-1;static-queue-2 + ACTIVEMQ_ENABLED_SCHEDULER: "true" + ACTIVEMQ_MIN_MEMORY: 512 + ACTIVEMQ_MAX_MEMORY: 2048 + + profiler: + image: profiler + build: + context: . + container_name: profiler + restart: always + ports: + - 7878:7878 + environment: + - "ELASTICSEARCH_HOSTNAME=elasticsearch" + - "URL_KNOWLEDGE_BASE=http://52.19.168.139:9200" + - "CONCURRENT_DOWNLOADS=20" + - "ACTIVEMQ_HOST=activemq" + - "DEMO_MODE=enabled" + volumes: + - "/tmp/profiler:/tmp/profiler" + - "/tmp/downloader:/tmp/downloader" + + diff --git a/profiler/mongodb/WiredTiger b/profiler/mongodb/WiredTiger new file mode 100644 index 0000000000000000000000000000000000000000..6245f6ecfb9ae8b2a9dc38f3d66ae58789f0c930 --- /dev/null +++ b/profiler/mongodb/WiredTiger @@ -0,0 +1,2 @@ +WiredTiger +WiredTiger 10.0.1: (April 12, 2021) diff --git a/profiler/mongodb/WiredTiger.lock b/profiler/mongodb/WiredTiger.lock new file mode 100644 index 0000000000000000000000000000000000000000..3d842068eaace86375117634c7e4bd9b9eba197a --- /dev/null +++ b/profiler/mongodb/WiredTiger.lock @@ -0,0 +1 @@ +WiredTiger lock file diff --git a/profiler/mongodb/WiredTiger.turtle b/profiler/mongodb/WiredTiger.turtle new file mode 100644 index 0000000000000000000000000000000000000000..8aa087337c639cc17f45f7f07eb87030fe6f02d8 --- /dev/null +++ b/profiler/mongodb/WiredTiger.turtle @@ -0,0 +1,6 @@ +WiredTiger version string +WiredTiger 10.0.1: (April 12, 2021) +WiredTiger version +major=10,minor=0,patch=1 +file:WiredTiger.wt +access_pattern_hint=none,allocation_size=4KB,app_metadata=,assert=(commit_timestamp=none,durable_timestamp=none,read_timestamp=none,write_timestamp=off),block_allocation=best,block_compressor=,cache_resident=false,checksum=on,collator=,columns=,dictionary=0,encryption=(keyid=,name=),format=btree,huffman_key=,huffman_value=,id=0,ignore_in_memory_cache_size=false,internal_item_max=0,internal_key_max=0,internal_key_truncate=true,internal_page_max=4KB,key_format=S,key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0,log=(enabled=true),memory_page_image_max=0,memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,prefix_compression_min=4,readonly=false,split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,tiered_object=false,tiered_storage=(auth_token=,bucket=,bucket_prefix=,cache_directory=,local_retention=300,name=,object_target_size=10M),value_format=S,verbose=[],version=(major=1,minor=1),write_timestamp_usage=none,checkpoint=(WiredTigerCheckpoint.430=(addr="018381e4ef950c178481e4cb6fc1f78581e4cb072fe3808080e2dfc0e23fc0",order=430,time=1647383958,size=28672,newest_start_durable_ts=0,oldest_start_ts=0,newest_txn=19,newest_stop_durable_ts=0,newest_stop_ts=-1,newest_stop_txn=-11,prepare=0,write_gen=1228,run_write_gen=1212)),checkpoint_backup_info=,checkpoint_lsn=(62,14336) diff --git a/profiler/mongodb/WiredTiger.wt b/profiler/mongodb/WiredTiger.wt new file mode 100644 index 0000000000000000000000000000000000000000..2c822c6e98f1cec8cfd96db8c3495560a3e51778 Binary files /dev/null and b/profiler/mongodb/WiredTiger.wt differ diff --git a/profiler/mongodb/WiredTigerHS.wt b/profiler/mongodb/WiredTigerHS.wt new file mode 100644 index 0000000000000000000000000000000000000000..3f019cba9b5ed0b1a0275084adb91169387c9683 Binary files /dev/null and b/profiler/mongodb/WiredTigerHS.wt differ diff --git a/profiler/mongodb/_mdb_catalog.wt b/profiler/mongodb/_mdb_catalog.wt new file mode 100644 index 0000000000000000000000000000000000000000..70e23b87673d02ff4443d76176434f4a7591e8b6 Binary files /dev/null and b/profiler/mongodb/_mdb_catalog.wt differ diff --git a/profiler/mongodb/collection-0--6309281103802864315.wt b/profiler/mongodb/collection-0--6309281103802864315.wt new file mode 100644 index 0000000000000000000000000000000000000000..2bab8c9675f9108b83e6e0040c2e4124a2cba8b1 Binary files /dev/null and b/profiler/mongodb/collection-0--6309281103802864315.wt differ diff --git a/profiler/mongodb/collection-2--6309281103802864315.wt b/profiler/mongodb/collection-2--6309281103802864315.wt new file mode 100644 index 0000000000000000000000000000000000000000..a4e66fb1fcfb6065618126ac98d5a5c5c8f3780d Binary files /dev/null and b/profiler/mongodb/collection-2--6309281103802864315.wt differ diff --git a/profiler/mongodb/collection-4--6309281103802864315.wt b/profiler/mongodb/collection-4--6309281103802864315.wt new file mode 100644 index 0000000000000000000000000000000000000000..0592f2b0ff893ccf0d04dcd03c6aff15c4d42f89 Binary files /dev/null and b/profiler/mongodb/collection-4--6309281103802864315.wt differ diff --git a/profiler/mongodb/collection-7--6309281103802864315.wt b/profiler/mongodb/collection-7--6309281103802864315.wt new file mode 100644 index 0000000000000000000000000000000000000000..9631aa3c56640ae51f695622ac84e11e9a7ce1e0 Binary files /dev/null and b/profiler/mongodb/collection-7--6309281103802864315.wt differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T13-35-25Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T13-35-25Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..7be9520ed21efd810a5e09f15b563e431022014f Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T13-35-25Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T13-40-28Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T13-40-28Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..cecbb3316ecb80562b1ebd6da07f46418be7c252 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T13-40-28Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T13-54-32Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T13-54-32Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..1d8ec747abb16985687a2f6133312407fa089b85 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T13-54-32Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-01-59Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-01-59Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..c2a192b6f4e3f62dc5ce583d4b2b246cc095c3ac Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-01-59Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-10-22Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-10-22Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..b93bdb4cde77958078012a7327ab69ecc9a5c87c Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-10-22Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-11-46Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-11-46Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..5cc712f3d665aab7cf8bb21d7bac982d065b2472 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-11-46Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-19-22Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-19-22Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..89ae5364bd9c7e4c96fb88a11e11e06222de724e Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-19-22Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-21-49Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-21-49Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..d10aa715da21dd4c85e50d18af2264ab961713b0 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-21-49Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-44-30Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-44-30Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..e1e35436362c28ff28c833147715ff11fb4b0d93 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-44-30Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-55-56Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-55-56Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..9cde8aa132c4506d893f93c744ab0e6d084d0614 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T14-55-56Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T15-03-47Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T15-03-47Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..00bf014fcede592ebc3fe0e4d4606bb07b257d3f Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T15-03-47Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T15-10-50Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T15-10-50Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..68b1eb62f355159d6a051ff50577a57578a53f32 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T15-10-50Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T15-15-10Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T15-15-10Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..ffe2b92de71fa15657f1b20d4d1c49751172b531 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T15-15-10Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T15-17-31Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T15-17-31Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..fcf06254ac7165e7fbad5c17be91e4b444cc07fa Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T15-17-31Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T18-52-11Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T18-52-11Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..67c241f33bade345df8ef3e5f274f4f5fbb624ef Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T18-52-11Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T18-55-14Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T18-55-14Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..e30124c9e08e2b61f02f8059cb5c95ee751dac27 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T18-55-14Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T19-04-30Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T19-04-30Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..3955cefc24297c00b41579e80064d8c04c85f2ea Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T19-04-30Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-03T19-11-38Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T19-11-38Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..13617d4e30475e03e452a7dd132dbee4243a6cd0 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-03T19-11-38Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-14T22-37-10Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-14T22-37-10Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..abe4c3041142ced92c47707fb64559ef0c0c99f8 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-14T22-37-10Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-14T22-54-02Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-14T22-54-02Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..ddeae3aab44b743d0c0aebbf420881363a252945 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-14T22-54-02Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-14T22-58-08Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-14T22-58-08Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..5f4a9e8020ceb7ea3ffd3b2b6574e65b9be36f93 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-14T22-58-08Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-14T23-26-44Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-14T23-26-44Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..c392abf6483f3480bb01a51ad380f6bee460ff01 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-14T23-26-44Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T13-34-30Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T13-34-30Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..ff2c3fbf085bdff9f03d79849085dfc812ca2b19 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T13-34-30Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T13-40-21Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T13-40-21Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..5e90ee6bd3af03656f38a024d525011c51f78dd5 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T13-40-21Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T13-59-42Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T13-59-42Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..f8f22656da05836b450b6244f584b9b62bcf45f0 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T13-59-42Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-09-12Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-09-12Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..0b5de73dc64ac8ee3fe75c5446902085d3ec173a Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-09-12Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-15-38Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-15-38Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..663e58e8d2a2585e3c275e87a23a4f3ebd7bbe6c Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-15-38Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-24-17Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-24-17Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..024c18d98f70230f5bf5ac46299543244e027507 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-24-17Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-25-58Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-25-58Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..ba4e8a8d980abfa6a098bf726ea573c24475d0d2 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-25-58Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-33-05Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-33-05Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..d004d3fc113cfc4acd299c9ea8b94c38c41e8a9b Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-33-05Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-36-53Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-36-53Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..0e2acebe52322dde7da5f662c201d78acf7aaf17 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-36-53Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-43-41Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-43-41Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..1e297068eecfb7dfe4ccceed121898af85694b8e Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T14-43-41Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-02-14Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-02-14Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..9463674c77c0a655553fc417605e770779a25f81 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-02-14Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-03-34Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-03-34Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..c780483a3ea32e2d58d7b43a5049769fd7d2aeb4 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-03-34Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-05-28Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-05-28Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..e7968933d21ea4177acced3675285fc3a49a461d Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-05-28Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-22-52Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-22-52Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..55c7eb0e950c7f3a1da23140dd93df9bc6fe0a36 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-22-52Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-33-02Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-33-02Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..c531eb1c9ec24e5e7c019ac8abbe87d6da306f7c Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-33-02Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-44-57Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-44-57Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..990a99ad19e8c3c0a5dc029c5948d296f08356ce Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-44-57Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-50-37Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-50-37Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..780ff1621d29e9952dcb0767c68298679db48b34 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-50-37Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-52-27Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-52-27Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..581b2ea674007268f66b1d12ffb93812d375fc25 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-52-27Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-56-08Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-56-08Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..0d54621d677d8c4fa12cd2e44371e7f2cb482ff9 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T15-56-08Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-00-46Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-00-46Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..107cad9c52151655570cdf72cf0bed451f9c3ccd Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-00-46Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-03-46Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-03-46Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..251adbc385469fea90ad66e8e22d18d91aa89bf3 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-03-46Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-14-17Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-14-17Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..b6688554aa4042a53d23ddd6ae3fd30e288deae7 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-14-17Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-20-55Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-20-55Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..83c7fa6033beab5064ff8ae1ef1945f3f9b5a631 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-20-55Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-27-00Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-27-00Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..33af46af39a0c7242c2a9168dacb19453bb53c45 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-27-00Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-34-07Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-34-07Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..f0522308b5ddbe861ca9ae55f8ea1686710e2f10 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-34-07Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-37-58Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-37-58Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..2fd399e5a1c82f0b9a85767931ac130f4ce7b333 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T16-37-58Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-26-23Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-26-23Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..d0483567b78f83aedf0da24efa901d2ef6d92df9 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-26-23Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-28-04Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-28-04Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..1d8bf88f6010de303aa63ea2c501b68f7f4a2419 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-28-04Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-30-01Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-30-01Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..92f1aaf6a6d6f2df6469e1a5022ccb76851a9469 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-30-01Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-33-14Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-33-14Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..a07350c5cb1390951c2638047c97df6c84b660ff Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-33-14Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-36-42Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-36-42Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..c9ab52469f60a3df05b1650de21562ba285569ab Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-36-42Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-37-36Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-37-36Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..d8d0275698ddfa019cf74d0a0d0817750586586f Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-37-36Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-39-57Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-39-57Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..43e123af36cc77fdb4577d83e4c50fbb99e872e8 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-39-57Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-47-16Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-47-16Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..0fc4f5b76ad3b26ae14bf6f504d5ef303bb9c092 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-47-16Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-54-15Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-54-15Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..69bbb8f3c29a523f155e882b6e332ac07302afdf Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-54-15Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-56-30Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-56-30Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..5ae7371f2b457e94c436583204d8f9e1fc7f2606 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-56-30Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-59-46Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-59-46Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..3465306b14426b42bf4e74d9263ab1abd0aa5458 Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T19-59-46Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T22-25-54Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T22-25-54Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..a350811f251379de57f8bdfcbb44506792a145ca Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T22-25-54Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T22-32-21Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T22-32-21Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..42a131affb3c377c9389d6ccdcb66616d0aa4ade Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T22-32-21Z-00000 differ diff --git a/profiler/mongodb/diagnostic.data/metrics.2022-03-15T22-34-32Z-00000 b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T22-34-32Z-00000 new file mode 100644 index 0000000000000000000000000000000000000000..12a642ef1f9cbcb6182d46d8664ecf51ba561d7d Binary files /dev/null and b/profiler/mongodb/diagnostic.data/metrics.2022-03-15T22-34-32Z-00000 differ diff --git a/profiler/mongodb/index-1--6309281103802864315.wt b/profiler/mongodb/index-1--6309281103802864315.wt new file mode 100644 index 0000000000000000000000000000000000000000..4cd10b64a381b6dff490e8f12f3c797299abecab Binary files /dev/null and b/profiler/mongodb/index-1--6309281103802864315.wt differ diff --git a/profiler/mongodb/index-3--6309281103802864315.wt b/profiler/mongodb/index-3--6309281103802864315.wt new file mode 100644 index 0000000000000000000000000000000000000000..7368a7aaaf16b642b53d0ea461adc042a13e2d99 Binary files /dev/null and b/profiler/mongodb/index-3--6309281103802864315.wt differ diff --git a/profiler/mongodb/index-5--6309281103802864315.wt b/profiler/mongodb/index-5--6309281103802864315.wt new file mode 100644 index 0000000000000000000000000000000000000000..d48bffe0e407b07788a82ebb0be846e372e2906c Binary files /dev/null and b/profiler/mongodb/index-5--6309281103802864315.wt differ diff --git a/profiler/mongodb/index-6--6309281103802864315.wt b/profiler/mongodb/index-6--6309281103802864315.wt new file mode 100644 index 0000000000000000000000000000000000000000..d034ebef27dbed5a67d051a386eb0f6d7052f7f6 Binary files /dev/null and b/profiler/mongodb/index-6--6309281103802864315.wt differ diff --git a/profiler/mongodb/index-8--6309281103802864315.wt b/profiler/mongodb/index-8--6309281103802864315.wt new file mode 100644 index 0000000000000000000000000000000000000000..510959fc665f06448b0b97c58209fe1c231eacd3 Binary files /dev/null and b/profiler/mongodb/index-8--6309281103802864315.wt differ diff --git a/profiler/mongodb/journal/WiredTigerLog.0000000062 b/profiler/mongodb/journal/WiredTigerLog.0000000062 new file mode 100644 index 0000000000000000000000000000000000000000..869ce0c89900d4081c57afc21b7f6be131c0068b Binary files /dev/null and b/profiler/mongodb/journal/WiredTigerLog.0000000062 differ diff --git a/profiler/mongodb/journal/WiredTigerPreplog.0000000001 b/profiler/mongodb/journal/WiredTigerPreplog.0000000001 new file mode 100644 index 0000000000000000000000000000000000000000..aa52a216f4fcb71fc3d1124740a85c53655bb09d Binary files /dev/null and b/profiler/mongodb/journal/WiredTigerPreplog.0000000001 differ diff --git a/profiler/mongodb/journal/WiredTigerPreplog.0000000002 b/profiler/mongodb/journal/WiredTigerPreplog.0000000002 new file mode 100644 index 0000000000000000000000000000000000000000..aa52a216f4fcb71fc3d1124740a85c53655bb09d Binary files /dev/null and b/profiler/mongodb/journal/WiredTigerPreplog.0000000002 differ diff --git a/profiler/mongodb/mongod.lock b/profiler/mongodb/mongod.lock new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/mongodb/sizeStorer.wt b/profiler/mongodb/sizeStorer.wt new file mode 100644 index 0000000000000000000000000000000000000000..66e529e7a459e1d0f9e0b723c23eb566e93dccfa Binary files /dev/null and b/profiler/mongodb/sizeStorer.wt differ diff --git a/profiler/mongodb/storage.bson b/profiler/mongodb/storage.bson new file mode 100644 index 0000000000000000000000000000000000000000..276b69451558644015832b36017fa871ed43443a Binary files /dev/null and b/profiler/mongodb/storage.bson differ diff --git a/profiler/src/.DS_Store b/profiler/src/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..817e3805e82c8e246906138ffbb5e277bc5321dd Binary files /dev/null and b/profiler/src/.DS_Store differ diff --git a/profiler/src/__pycache__/analysermanager.cpython-36.pyc b/profiler/src/__pycache__/analysermanager.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3fe25ff7edf2bbc75fdd417f0d4183f1a90f3d69 Binary files /dev/null and b/profiler/src/__pycache__/analysermanager.cpython-36.pyc differ diff --git a/profiler/src/__pycache__/analysermanager.cpython-39.pyc b/profiler/src/__pycache__/analysermanager.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..06aeb0543f6a347a1e33791f0774f1f1909a50ed Binary files /dev/null and b/profiler/src/__pycache__/analysermanager.cpython-39.pyc differ diff --git a/profiler/src/__pycache__/bagofwords.cpython-36.pyc b/profiler/src/__pycache__/bagofwords.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a33404c74ca20ce61da75b08b3ba8b389edfb53 Binary files /dev/null and b/profiler/src/__pycache__/bagofwords.cpython-36.pyc differ diff --git a/profiler/src/__pycache__/bagofwords.cpython-39.pyc b/profiler/src/__pycache__/bagofwords.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0edfa3a3fd750b739b0dbde350aa863455398f8d Binary files /dev/null and b/profiler/src/__pycache__/bagofwords.cpython-39.pyc differ diff --git a/profiler/src/__pycache__/doap.cpython-36.pyc b/profiler/src/__pycache__/doap.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..03308c828d4df583e80a0f464a786c9fc27be290 Binary files /dev/null and b/profiler/src/__pycache__/doap.cpython-36.pyc differ diff --git a/profiler/src/__pycache__/doap.cpython-39.pyc b/profiler/src/__pycache__/doap.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..edb1ba1337cda58bb75bc86425d03323f5a1cfde Binary files /dev/null and b/profiler/src/__pycache__/doap.cpython-39.pyc differ diff --git a/profiler/src/__pycache__/downloader.cpython-36.pyc b/profiler/src/__pycache__/downloader.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3aecb08124e409a59f879901c8dff5a328cad42f Binary files /dev/null and b/profiler/src/__pycache__/downloader.cpython-36.pyc differ diff --git a/profiler/src/__pycache__/downloader.cpython-39.pyc b/profiler/src/__pycache__/downloader.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5cf5cda8969fdac8e8a480dd60b51c00031d6578 Binary files /dev/null and b/profiler/src/__pycache__/downloader.cpython-39.pyc differ diff --git a/profiler/src/__pycache__/filter.cpython-36.pyc b/profiler/src/__pycache__/filter.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0eb16bec561503bc70b93533c5e6abeebd4c29da Binary files /dev/null and b/profiler/src/__pycache__/filter.cpython-36.pyc differ diff --git a/profiler/src/__pycache__/filter.cpython-37.pyc b/profiler/src/__pycache__/filter.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b8b084a59fa45f14a5b075438950fc25a28ee5a Binary files /dev/null and b/profiler/src/__pycache__/filter.cpython-37.pyc differ diff --git a/profiler/src/__pycache__/filter.cpython-39.pyc b/profiler/src/__pycache__/filter.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1d3da16c1e32ff3a379136b0f46aeb65f295a5e5 Binary files /dev/null and b/profiler/src/__pycache__/filter.cpython-39.pyc differ diff --git a/profiler/src/__pycache__/matcher.cpython-36.pyc b/profiler/src/__pycache__/matcher.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d58697839875d4d55e0d7a2dda6600284c4bbfd6 Binary files /dev/null and b/profiler/src/__pycache__/matcher.cpython-36.pyc differ diff --git a/profiler/src/__pycache__/matcher.cpython-39.pyc b/profiler/src/__pycache__/matcher.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..21ccf2a7c1da1cddb86e9fcc644df9c313a8bf3a Binary files /dev/null and b/profiler/src/__pycache__/matcher.cpython-39.pyc differ diff --git a/profiler/src/amqp/MorphemicConnection.py b/profiler/src/amqp/MorphemicConnection.py new file mode 100644 index 0000000000000000000000000000000000000000..9fd61b18c35ed6f26bd3197c53d5209aaa77ef41 --- /dev/null +++ b/profiler/src/amqp/MorphemicConnection.py @@ -0,0 +1,71 @@ +import stomp +import logging +import json + +from stomp.listener import PrintingListener + +class Connection: + + subscriptions = [] + + def __init__(self, username, password, + host='localhost', + port=61613, + debug=False): + self.username = username + self.password = password + self.hosts = [(host, port)] + self.conn = stomp.Connection(host_and_ports=self.hosts, auto_content_length=False) + + if debug: + logging.debug("Enabling debug") + self.conn.set_listener('print', PrintingListener()) + + def _build_id(self,topic,id): + return "id.%s-%s" % (topic,id) + + def set_listener(self, id, listener): + if self.conn: + self.conn.set_listener(id,listener) + + def subscribe(self,destination, id, ack='auto'): + if not self.conn: + raise RuntimeError('You need to connect first') + + self.conn.subscribe(destination, id, ack) + + def topic(self,destination, id, ack='auto'): + self.subscribe("/topic/%s" % destination ,self._build_id(destination,id),ack) + + def queue(self,destination, id, ack='auto'): + self.subscribe("/queue/%s" % destination ,self._build_id(destination,id),ack) + + def unsubscribe(self, topic,id): + + if not self.conn: + return + self.conn.unsubscribe(self._build_id(topic,id)) + + + def connect(self, wait=True): + + if not self.conn: + return + + self.conn.connect(self.username, self.password, wait=wait) + + def disconnect(self): + self.conn.disconnect() + + def send_to_topic(self,destination, body, headers={}, **kwargs): + + if not self.conn: + logging.error("Connect first") + return + + str = json.dumps(body) + + self.conn.send(destination="/topic/%s" % destination, + body= str, + content_type="application/json", + headers=headers, **kwargs) diff --git a/profiler/src/analysermanager.py b/profiler/src/analysermanager.py new file mode 100644 index 0000000000000000000000000000000000000000..6a7965bcd6768fcaece387816b62f702dca49564 --- /dev/null +++ b/profiler/src/analysermanager.py @@ -0,0 +1,58 @@ +import os, time, json +from bagofwords import BagOfWordsManager +from threading import Thread +#import the GraphExtractor class over here +#please use local_repositories_folder = os.environ.get("LOCAL_REPOSITORIES_FOLDER","/tmp/downloader"), which is +#the root folder where repositories are downloaded +#the path to a specific repository will be the local_repositories_folder + "/" + project_name +#GraphExtractor class should have a method called addProject which receives: +# - repo (declared in the filter.py class Repo) +# - AnalyserManager instance (for being able to call setOutput when you finished extracting graph) + +""" +the repo object contains the name of the repository +class Repo(BaseModel): + name: str + url: str + idgit: str + status: str + n_error: int + type: str + labels: list +""" +# the project_name is the property name of the Repo class +# labels contains all labels of that repositories + +#When calling the setOutput , you should pass : +# - method ("bo" for the bag of world and "graph" for the Graph extractor) +# - data (data to be saved or to be evaluated) +# - repo (the object the your received from addProject) + +local_repositories_folder = os.environ.get("LOCAL_REPOSITORIES_FOLDER","/tmp/downloader") + +class AnalyserManager(): + def __init__(self, profiler): + self.profiler = profiler + self.bo = BagOfWordsManager() + # declare de graph extractor over here + self.analysers = [self.bo] #add it to the list + + def setOutput(self,method, data, repo): + if repo.type == 'analyser': + self.profiler.saveFeatures(method,repo, data) + else: + #send to the matcher + data['method'] = method + self.profiler.sendToMatcher(data, repo) + self.deleteProject(repo) + + def deleteProject(self, repo): + main_folder = local_repositories_folder + "/" + repo.name + os.system("rm -rf {0}/".format(main_folder)) + print("Deleting project {0} removed".format(repo.name)) + + def addProject(self, repo): + print("Repositories {0} added to the analyser manager".format(repo.name)) + for analyser in self.analysers: + thread = Thread(target=analyser.addProject,args=(repo, self, )) + thread.start() \ No newline at end of file diff --git a/profiler/src/api.py b/profiler/src/api.py new file mode 100644 index 0000000000000000000000000000000000000000..4d3debd4048f320c654f1e7f1179f23a78d4e0e3 --- /dev/null +++ b/profiler/src/api.py @@ -0,0 +1,183 @@ +import os, time, json, logging, uvicorn +from fastapi import FastAPI +from typing import List +from pydantic import BaseModel +from threading import Thread +from amqp.MorphemicConnection import Connection + +activemq_hostname = os.environ.get("ACTIVEMQ_HOST", "localhost") +activemq_port = int(os.environ.get("ACTIVEMQ_PORT", "61613")) +activemq_topic = os.environ.get("ACTIVEMQ_TOPIC", "static-topic-1") +activemq_subs_key = os.environ.get("ACTIVEMQ_SUBS_KEY", "subs-1") +activemq_username = os.environ.get("ACTIVEMQ_USERNAME", "aaa") +activemq_password = os.environ.get("ACTIVEMQ_PASSWORD", "111") +profiler_topic = os.environ.get("PROFILER_TOPIC","profiler") + +folder_response = os.environ.get("RESPONSE_FOLDER","./profiler") + +logname = "./log/profiler.log" +logging.basicConfig(filename=logname,filemode='a',format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',datefmt='%H:%M:%S',level=logging.DEBUG) + + + +#[{"component_name": "CU_UP", "categories":["cat1","cat2"], "language":"java", "repository":"https://github/username/project_name"},...] +class ComponentRequest(BaseModel): + component_name: str + categories: list + language: str + repository: str + +class ListComponentsRequest(List): + data = List[ComponentRequest] + +#[{"component_name":"CU_UP","suggested_categories":["cat3", "cat4"]}, ...] + +class ComponentResponse(BaseModel): + component_name: str + suggested_categories: list + +class ListComponentsResponse(List): + data = List[ComponentResponse] + +class ReportObject(BaseModel): + code: int + data: dict + +class Publisher(Thread): + def __init__(self): + self.message = None + self.destination = None + self.client = None + super(Publisher, self).__init__() + + def setParameters(self, message, queue): + self.message = message + self.queue = queue + + def run(self): + self.connect() + while True: + time.sleep(2) + + def connect(self): + while True: + try: + print('The publisher tries to connect to ActiveMQ broker') + logging.info('The publisher tries to connect to ActiveMQ broker') + self.client = Connection(username=activemq_username, password=activemq_password, host=activemq_hostname,port=activemq_port, debug=False) + self.client.connect() + print("connection established") + logging.info("connection established") + return True + except: + print("Could not connect the publisher") + logging.error("Could not connect the publisher") + + def send(self): + if self.message == None or self.queue == None: + print("Message or queue is None") + return False + try: + #self.client.send(body=json.dumps(self.message), destination=self.queue, persistent='false', auto_content_length=False, content_type="application/json") + self.client.send_to_topic(self.queue, self.message) + return True + except Exception as e: + print(e) + self.client.disconnect() + print("Reconnection in 10s ...") + logging.info("Reconnection in 10s ...") + time.sleep(10) + self.connect() + self.send() + + +app = FastAPI() +publisher = Publisher() +publisher.connect() + +categories = ["gpu", "serverless","fpga"] +response = {} + +#testing +def generateResponse(component_name): + return {"component_name":component_name, "suggested_categories":categories} + +def checkPreviousResponse(): + if not os.path.exists(folder_response + "/response.text"): + return None + global response + _file = open(folder_response + "/response.txt","r") + content = _file.read() + if len(content) > 0: + response = json.loads(content) + print("Previous content found and loaded") + +def saveResponse(): + _file = open(folder_response + "/response.txt","w") + _file.write(json.dumps(response)) + _file.close + print("Response saved") + +def areAllComponentsPresent(components, data): + print("components", components) + print("data", data) + for component in data: + if not component["component_name"] in components: + return False + return True + +@app.get("/") +def read_root(): + return {"Version": "1.0", "Maintainer":"Jean-Didir Totow "} + +@app.get("/collect") +async def collect(code: int): + if code in response: + if response[code]["status"] == "waiting": + return {"status": False, "message": "Response not ready, please try later"} + elif response[code]["status"] == "error": + return {"status": False, "message":"An error {0} occured while processing data".format(response[code]["message"])} + elif response[code]["status"] == "ready": + content = response[code]["data"] + del response[code] + saveResponse() + return {"status": True, "data": content} + else: + return {"status":False, "message":"unknown status"} + else: + return {"status": False, "message": "code not found, please try to send analyse request again"} + +@app.post("/report") +async def report(report: ReportObject): + global response + if report.code in response: + response[report.code]["data"].append(report.data) + if areAllComponentsPresent(response[report.code]["components"],response[report.code]["data"]): + response[report.code]["status"] = "ready" + saveResponse() + return {"status": True} + else: + return {"status": False, "message": "{0} not found".format(report.code)} + + +@app.post("/analyse") +async def analyse(data: ListComponentsRequest): + global response + try: + #for component in data: + # response.append(generateResponse(component['component_name'])) + code = len(list(response.keys())) + 1 + _data = {"code": code,"request":"suggest", "data": data} + publisher.setParameters(_data,profiler_topic) + publisher.send() + components = [] + for component in data: + components.append(component["component_name"]) + response[code] = {"status": "waiting", "components": components, "data":[]} + return {"status": True, "code": code} + except Exception as e: + return {"status": False, "message": e} + +if __name__ == "__main__": + checkPreviousResponse() + uvicorn.run(app, host="0.0.0.0", port=7878) \ No newline at end of file diff --git a/profiler/src/bagofwords.py b/profiler/src/bagofwords.py new file mode 100644 index 0000000000000000000000000000000000000000..18198bbf0aadf0c66db4cba9db687a7a0a609008 --- /dev/null +++ b/profiler/src/bagofwords.py @@ -0,0 +1,305 @@ +import os, re +import subprocess +#from nltk.stem import PorterStemmer +#from nltk.tokenize import word_tokenize +from tensorflow import keras +import tensorflow as tf +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.decomposition import TruncatedSVD +from sklearn.cluster import KMeans +import pandas as pd +from scipy import sparse +#import nltk +#from dextractor import analyse +#from extractor.dextractor import analyse +#from nltk.stem import PorterStemmer + +#nltk.download('punkt') +model_folder = os.environ.get("MODEL_FOLDER","./models") +local_repositories_folder = os.environ.get("LOCAL_REPOSITORIES_FOLDER","/tmp/downloader") +#slash_offset = local_repositories_folder.count("/") +labels = ["gpu","fpga","edge","docker","serverless","knn","cnn","regression","rnn","kmean","linearprogramming"] +languages = {'py':'python','java':'java','c':'ccpp','h':'ccpp','js':'javascript','cpp':'ccpp'} +slash_offset = local_repositories_folder.count("/") +extensions = ['h','cpp','java','py','c','js'] +token_map = {'(':'openp',')': 'closep','+':'add','-':'subs','*':'mult','/':'div','%':'mod','=':'egal','.':'time',',':'com','bo':'openc','bc':'closec','!':'not','==':'comp','>':'high','<':'lower','>=':'hightegal','<=':'loweregal'} +max_features_length = int(os.environ.get("MAX_FEATURE_LENGTH","100")) + +class BoWExtractor(): + def __init__(self): + self.files = None + self.stop_words = {} + self.prefix = [] + self.non_code_source_files = [] + + def setFiles(self,files): + self.files = files + + def isFileTypeSupported(self, _file): + _ext = _file[_file.rindex(".")+1:] + return _ext in extensions + + def includeKeyWord(self, extension): + if extension == "py": + return ["import"] + elif extension == "cpp" or extension == "h" or extension == "c": + return ["include"] + elif extension == "js": + return ["require","import"] + elif extension == "java": + return ["import", "package"] + else: + return None + + def extractInclude(self, extension, _file): + keys = self.includeKeyWord(extension) + lines = [] + file = None + try: + file = open(_file,'r') + except: + return [] + for key in keys: + for line in file: + try: + if line.index(key) > -1: + lines.append(line) + except: + pass + + lines = "".join(lines) + string = re.sub(r"[^a-zA-Z0-9]+"," ", lines) + string = re.sub('(?<=[A-Za-z])(?=[A-Z][a-z])', ' ', string) + string = re.sub(r"\s{2,}", " ", string) + + response = string.strip().lower() + response = response.split(" ") + response = [w for w in response if len(w) > 1] + response = list(set(response)) + return response + + def extractConfigFiles(self): + result = [] + for _file in self.non_code_source_files: + _count = _file.count("/") + if _count - slash_offset == 2: + base = os.path.basename(_file) + name = os.path.splitext(base)[0] + if name.startswith("."): + continue + if re.match(r'^\w+$', name): + result.append(name) + return result + + def extractIncludes(self): + result = [] + for _file in self.files: + try: + if not self.isFileTypeSupported(_file): + self.non_code_source_files.append(_file) + continue + extension = _file[_file.rindex(".")+1:] + if extension == None: + continue + result.extend(self.extractInclude(extension, _file)) + result.append(languages[extension]) + words_to_remove = ["import", "from", "as","include","require", "package","var", "const","let"] + for w in words_to_remove: + if w in result: + result.remove(w) + except Exception as e: + pass + result.extend(self.extractConfigFiles()) + result = list(set(result)) + return result + +class BagOfWordsManager(): + def __init__(self): + self.extractor = BoWExtractor() + + def cleanTokens(self, _list): + def clean(world): + punc = '''!()-[]{};:'"\,<>./?@#$%^&*_~''' + for ele in world: + if ele in punc: + world = world.replace(ele, " ") + return world.split(" ") + + result = [] + for w in _list: + cleaned = clean(w) + for w_cleaned in cleaned: + if w_cleaned.isnumeric(): + continue + if len(w_cleaned) > 1: + result.append(w_cleaned.lower()) + return result + + def addProject(self, repo, handler): + project = Project(repo.name, repo.url, repo.labels, repo.type) + _files = project.scanFiles() + if len(_files) == 0: + return None + self.extractor.setFiles(_files) + words = self.extractor.extractIncludes() + #words.extend(self.extractor.extractConfigFiles()) + words = self.cleanTokens(words) + words = list(dict.fromkeys(words)) + + result = {'tokens': words, 'labels': repo.labels} + handler.setOutput('bag_of_worlds',result, repo) + + """ + def addProject(self, repo, handler): + project = Project(repo.name, repo.url, repo.labels, repo.type) + #_files = project.scanFiles() + result = None + try: + _url = project.getRootFolder() + result = analyse(project.getRootFolder()) + except Exception as e: + print("Could not extract library from the project") + if result: + all_tokens = result['dependencies'] + all_tokens.extend(result['configurations']) + text = self.cleanText(" ".join(all_tokens)) + tokens = text.split(" ") + result = {'tokens': tokens, 'labels': repo.labels} + handler.setOutput('bag_of_worlds',result, repo) + if _files != []: + self.extractor.setFiles(_files) + words = self.extractor.extractIncludes() + if len(words) > 0: + project.setWords(words) + try: + handler.setOutput(project.getOuput(), repo) + except: + pass + else: + print("No file found for the repository {0}".format(name)) + """ + +class Project(): + def __init__(self, name, url, label, _type): + self.name = name + self.url = url + self.label = label + self.type = _type + self.words = [] + self.main_folder = local_repositories_folder + "/" + self.name + self.files = [] + self.folders = [] + + def getRootFolder(self): + return self.main_folder + + def getName(self): + return self.name + def getUrl(self): + return self.url + def getLabel(self): + return self.label + def getType(self): + return self.type + def setWords(self, words): + self.words = words + print("{0} tokens found for project: {1}".format(len(words),self.name)) + + def scanFiles(self): + self.main_folder = local_repositories_folder + "/" + self.name + folders_couple = [x for x in os.walk(self.main_folder)] + for _folder in folders_couple: + files = _folder[2] + for _file in files: + self.files.append(_folder[0] + "/" + _file) + _folders = _folder[1] + for fd in _folders: + self.folders.append(_folder[0]+ "/" + fd) + return self.files + + def getOuput(self): + return {'method': 'bo', 'data':{'project_name': self.name,'label': self.label, 'tokens': self.words, 'size': len(self.words)}} + + +class Classifier(): + def __init__(self): + self.model = None + self.x = None + self.language = None + + def loadModels(self): + try: + if os.path.isfile(model_folder+"/nn.model_{0}/saved_model.pb".format(self.language)): + self.model = keras.models.load_model(model_folder + "/nn.model_{0}".format(self.language)) + print("Matcher loaded {0} ML model".format(self.language)) + else: + print("File "+ model_folder+"/nn.model_{0}/saved_model.pb nit found".format(self.language)) + except Exception as e: + print("Could not load the model for language {0} => ".format(self.language),e) + + def formatResult(self, Y): + result = [] + for res in Y: + index = 0 + _object = {} + for v in res: + _object[labels[index]] = float("{:.3f}".format(v)) + index +=1 + result.append(_object) + return result + + def setLanguage(self, language): + self.language = language + self.loadModels() + + def setFeaturesSmart(self, tokens, paddings,index): + df = pd.DataFrame() + _json = {'words': " ".join(tokens)} + df = df.append(_json, ignore_index=True) + vectorizer = TfidfVectorizer() + self.x = vectorizer.fit_transform(df['words']) + if self.x.shape[1] > max_features_length: + print("Applying PCA ...") + pca = TruncatedSVD(max_features_length) + self.x = pca.fit_transform(self.x) + self.x = sparse.csr_matrix(self.x, shape=(1, max_features_length)) + return self.model != None + elif self.x.shape[1] < max_features_length: + if index >= len(paddings): + return False + else: + tokens.append(paddings[index]) + index +=1 + return self.setFeaturesSmart(tokens, paddings, index) + else: + return self.model != None + + def setFeatures(self, tokens): + if not self.model: + print("No ML model found") + return None + + df = pd.DataFrame() + _json = {'words': " ".join(tokens)} + print("Tokens size before vectorization = {0}".format(len(tokens))) + df = df.append(_json, ignore_index=True) + vectorizer = TfidfVectorizer() + self.x = vectorizer.fit_transform(df['words']) + print("End of the preparation of the input vector, shape after vectorization {0}".format(self.x.shape)) + if self.x.shape[1] > max_features_length: + pca = TruncatedSVD(max_features_length) + self.x = pca.fit_transform(self.x) + print('Application of PCA') + print("Final shape {0}".format(self.x.shape)) + print(self.x) + return self.x.shape[1] == max_features_length + + def predict(self): + try: + X = tf.convert_to_tensor(self.x.toarray()) + y = self.model.predict(X) + return self.formatResult(y) + except Exception as e: + print(type(self.x)) + print(e) + return None diff --git a/profiler/src/camelhandler.py b/profiler/src/camelhandler.py new file mode 100644 index 0000000000000000000000000000000000000000..73586f22fef3ebdb4ba3bc140d44d4cdb16b602d --- /dev/null +++ b/profiler/src/camelhandler.py @@ -0,0 +1,43 @@ +import sys +import os +import stomp +import json +import random +import time + +user = os.environ.get("ACTIVEMQ_USER","aaa") +password = os.environ.get("ACTIVEMQ_PASSWORD","111") +host = os.environ.get("ACTIVEMQ_HOST","localhost") +port = int(os.environ.get("ACTIVEMQ_PORT","61613")) or 61613 + +destination = "profiler" + +connected = False +conn = None +while not connected: + print("Trying to connect to ActiveMQ Broker") + try: + conn = stomp.Connection(host_and_ports = [(host, port)]) + conn.connect(login=user,passcode=password) + print("Connexion established") + connected = True + except: + print("Could not establish connection to ActiveMQ Broker") + time.sleep(10) + + +data = { + 'request': 'suggest', + 'name':'karina', + "url":"https://github.com/flanksource/karina", + "topics":['docker'], + 'languages': ['python'], + 'published_at': '12-12-2020' +} +conn.send(body=json.dumps(data), destination=destination, persistent='false') +print(data) +#conn.send(body=' '.join(sys.argv[1:]), destination='/queue/test') +time.sleep(20) +conn.disconnect() + +#curl -X POST -d '{"application":"application_test","start":"10m"}' -H 'Content-type:application/json' http://localhost:8767/api/v1/make \ No newline at end of file diff --git a/profiler/src/doap.py b/profiler/src/doap.py new file mode 100644 index 0000000000000000000000000000000000000000..a493a94a8938d08eb9bde918e85705127c72efda --- /dev/null +++ b/profiler/src/doap.py @@ -0,0 +1,32 @@ +class DoapModel(): + def __init__(self, id, name, url, topics, labels, languages, published_at, component,code): + self.id = id + self.name = name + self.url = url + self.topics = topics + self.labels = labels + self.languages = languages + self.component = component + self.date = published_at + self.code = code + + def getID(self): + return self.id + def getName(self): + return self.name + def getUrl(self): + return self.url + def getTopics(self): + return self.topics + def getLabels(self): + return self.labels + def getLanguages(self): + return self.languages + def getDate(self): + return self.date + def getComponentName(self): + return self.component + def getComponent(self): + return self.component + def getCode(self): + return self.code \ No newline at end of file diff --git a/profiler/src/downloader.py b/profiler/src/downloader.py new file mode 100644 index 0000000000000000000000000000000000000000..ef8c814035a0e8c057d884f6828b6c8d72a10e91 --- /dev/null +++ b/profiler/src/downloader.py @@ -0,0 +1,122 @@ +import os, json, time +from threading import Thread +from pydantic import BaseModel +import git + +local_repositories_folder = os.environ.get("LOCAL_REPOSITORIES_FOLDER","/tmp/downloader") +slash_offset = local_repositories_folder.count("/") +concurrent_downloads = int(os.environ.get("CONCURRENT_DOWNLOADS", "3")) +max_error_tolerance = int(os.environ.get("MAX_ERROR_TOLERANCE","2")) +max_waiting_list = int(os.environ.get("MAX_WAITING_LIST","20")) + +class Repo(BaseModel): + name: str + url: str + idgit: str + status: str + n_error: int + type: str + labels: list + topics: list + date: str + component: str + code: int + language: str + +class Downloader(Thread): + def __init__(self, profiler): + self.list_url = {} + self.n_downloaded = 0 + self.n_error = 0 + self.n_downloading = 0 + self.max_repo_error = max_error_tolerance + self.max_concurrent_download = concurrent_downloads + self.profiler = profiler + #self.manager = ProjectManager() + self.stop = False + super(Downloader, self).__init__() + + def addUrl(self, name, url, idgit, _type, labels, topics, _date, component,code,language): + if not name in self.list_url: + repo = Repo(name=name,url=url, idgit=idgit,type=_type,labels=labels, topics=topics,status="NotStarted",n_error=0, date=_date, component=component,code=code,language=language) + self.list_url[name] = repo + return True + return False + def handler(self, repo, action, message=""): + if action == "start": + self.n_downloading +=1 + repo.status = "downloading" + if action == "error": + self.n_error +=1 + repo.status = "error" + #repo.n_error +=1 + self.n_downloading -=1 + del self.list_url[repo.name] + self.profiler.deleteProject(repo) + if action == "end": + self.n_downloaded +=1 + self.n_downloading -=1 + if repo.name in self.list_url: + del self.list_url[repo.name] + self.profiler.addProject(repo) + + def download(self, repo, handler): + try: + handler(repo, "start") + git.Git(local_repositories_folder).clone(repo.url) + handler(repo, "end") + except Exception as e: + print(e) + handler(repo, "error", e) + + def startWorker(self, repo): + thread = Thread(target=self.download, args=(repo, self.handler,)) + thread.start() + + def stopDownloader(self): + print("Downloader ::: Stop signal received") + self.stop = True + def getDownloading(self): + return self.n_downloading + + def getWaitingListSize(self): + return len(list(self.list_url.keys())) - self.n_downloading + + def run(self): + print("Downloader ::: Started") + if not os.path.exists('/tmp/downloader'): + os.makedirs('/tmp/downloader') + print("Repositories folder created") + else: + print("downloader folder found") + while True: + index = 0 + _list_keys = list(self.list_url.keys()) + waiting_list = self.getWaitingListSize() + print("Downloaded: {0}, Downloading: {1}, Error: {2}, Waiting list: {3}".format(self.n_downloaded,self.n_downloading,self.n_error,waiting_list,end='\r')) + while True: + if self.stop: + break + if len(_list_keys) == 0: + break + if index >= len(_list_keys): + break + k = _list_keys[index] + repo = None + if k in self.list_url: + repo = self.list_url[k] + if repo == None: + index +=1 + continue + if repo.status == "downloading": + index +=1 + continue + if self.n_downloading == self.max_concurrent_download: + time.sleep(10) + continue + self.startWorker(repo) + index +=1 + if self.stop: + break + time.sleep(3) + print("Downloader ::: stopped") \ No newline at end of file diff --git a/profiler/src/extractor/.gitignore b/profiler/src/extractor/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e20aae30daf932e8b498ec3d8b127ef43f257e6e --- /dev/null +++ b/profiler/src/extractor/.gitignore @@ -0,0 +1,147 @@ +.DS_Store + +# VS Code environment settings +.vscode/ + +# Testing data and repositories +tests/data/* +!tests/data/README.md + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ \ No newline at end of file diff --git a/profiler/src/extractor/LICENSE b/profiler/src/extractor/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..b831fe0500edcc4b50e0200a66d9e35eb63274fb --- /dev/null +++ b/profiler/src/extractor/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Alexandros Raikos + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/profiler/src/extractor/README.md b/profiler/src/extractor/README.md new file mode 100644 index 0000000000000000000000000000000000000000..77a9b9335b5f71b0780b1c68b20f4522f71285d7 --- /dev/null +++ b/profiler/src/extractor/README.md @@ -0,0 +1,66 @@ +# Dependency Extractor + +A Python library which extracts library dependencies from source files written in most mainstream programming languages. + +** :warning: Warning: This library is not meant for production use, as extended testing has not been carried out yet.** + +## Getting started + +Follow the steps below to get started with `dextractor`. + +### Installation + +This package can be installed easily via `pip`. Run the commands below: + +```bash +# Clone or download the .zip from the Releases tab. +git clone https://www.github.com/alexandrosraikos/dependency-extractor + +# Navigate to the folder. +cd dependency-extractor + +# Install locally +pip install . +``` + +### Usage + +It is meant to be imported and called via a single module, which returns a `Dictionary` of dependencies given a single file or a directory. + +```python +from dextractor import analyse + +# Use all default parameters. +result = analyse("path/to/file/or/directory") + +# Define a different maximum file size (in bytes). +result = analyse("path/to/file/or/directory", max_file_size=2000000) # <- 2MB + +# Ignore local and relative dependencies. +result = analyse("path/to/file/or/directory", strict=True) + +# Enable verbose output. NOTE: Do not enable on parallel analyses. +result = analyse("path/to/file/or/directory", verbose=True) + +# +# Then you can access the following keys: +# -- +dependencies = result["dependencies"] +configurations = result["configurations"] + + +``` + +## Unit testing + +Please consult the README in the `tests` folder. + +## Supported languages + +All languages which are supported are still in alpha. Regular expressions which detect imports in source files must be polished and updated with the nuances of each programming language. Currently the supported languages are: + +1. C/C++ +1. Go +1. Python (_duh!_) +1. Java +1. JavaScript diff --git a/profiler/src/extractor/dextractor/__init__.py b/profiler/src/extractor/dextractor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d2e9de25e9033268110c456a8501b0060c1b82f3 --- /dev/null +++ b/profiler/src/extractor/dextractor/__init__.py @@ -0,0 +1,194 @@ +# --------- +# This source file is part of the Dependency Extractor python open source package. +# Copyright (c) 2021, Alexandros Raikos tou Konstantinou. +# +# Licensed under the MIT License. +# --------- + +# Standard library. +import os +from posixpath import lexists +from typing import Set, List + +# Third party dependencies. +import colorama +from colorama import Fore + +# Local dependencies. +from .src.parser import SourceFile +from .src.exclusions import configuration_files, ignored_files, ignored_extensions + + +def analyse( + any_path: str, + max_file_size=5000000, + strict=False, + verbose=False, +) -> Set: + """ + Retrieve any path and analyse for configuration files and source file library dependencies. + + Parameters + ---------- + - `any_path : str` + A string containing a valid system path which is accessible from this script. + - `max_file_size : int` + A integer indicating the byte limit of source files to be read. + This is useful for directories were irrelevant large data sets are also included. + - `strict : bool` + A flag which excludes internal and relative packages. + - `verbose : bool` + Enables verbose output for each scanned file. + """ + + # 0. Setup + # - 0.1. Directory coverage counter. + coverage_counter = 0 + ignored_counter = 0 + + # - 0.2 Initialise colorama. + colorama.init(autoreset=True) + + # 0. Initialise empty dependencies array. + dependencies = set() + configurations = set() + results = {} + + # 1. Process given path. + if os.path.isdir(any_path): + total_file_count = 0 + # 1.1. When the given path points to a directory, + # recursively check the directory tree for all files. + for root, _, files in os.walk(any_path): + # 1.1.1. Traverse all available files. + for file in files: + try: + # 1.1.2. Check for supported language and size. + if os.stat(os.path.join(root, file)).st_size < max_file_size: + if ( + os.path.splitext(file)[0] + os.path.splitext(file)[1] + in configuration_files + ): + configurations.update( + {os.path.splitext(file)[0] + os.path.splitext(file)[1]} + ) + if ( + (os.path.splitext(file)[0] not in ignored_files) + and ( + os.path.splitext(file)[0] + os.path.splitext(file)[1] + not in configuration_files + ) + and (os.path.splitext(file)[1] not in ignored_extensions) + or ( + os.path.splitext(file)[0] + os.path.splitext(file)[1] + == "package.json" + ) + ): + # 1.1.3. Extract dependencies. + source_file = SourceFile(os.path.join(root, file)) + dependencies.update( + source_file.dependencies(verbose, strict) + ) + coverage_counter += 1 + else: + ignored_counter += 1 + raise TypeError + else: + raise MemoryError + except TypeError: + if verbose: + print("[dextractor]", end=" ") + print(Fore.YELLOW + "NOTICE:", end=" ") + print(f"The file '{file}' does not contain source code.") + except NotImplementedError: + if verbose: + print("[dextractor]", end=" ") + print(Fore.YELLOW + "NOTICE:", end=" ") + print(f"The file '{file}' is not yet supported by this module.") + except MemoryError: + if verbose: + print("[dextractor]", end=" ") + print(Fore.YELLOW + "NOTICE:", end=" ") + print(f"The file '{file}' is too large and will be ignored.") + except IOError: + print("[dextractor]", end=" ") + print(Fore.RED + "ERROR:", end=" ") + print(f"The file '{file}' could not be accessed.") + # 1.1.4 Update total file count. + total_file_count += len(files) + # 1.1.5. Extract statistics. + if verbose and len(files) > 0 and coverage_counter > 0: + print("[dextractor]", end=" ") + print(Fore.GREEN + "SUCCESS:") + print( + f""" + - - - - - - - + | Files detected under {round(max_file_size/1000000,1)}MB: {total_file_count} + | Files scanned: {coverage_counter} + | Non-source files ignored: {ignored_counter} + | Unsupported files: {total_file_count-coverage_counter-ignored_counter} + | Source file coverage: {round(coverage_counter/(total_file_count-ignored_counter),3)*100}% + | Dependencies found: {len(dependencies)} + - - - - - - - + """ + ) + + elif os.path.isfile(any_path): + # 1.2. When the given path points to a single file. + # ----- + # NOTE: Exception descriptions are different when + # running the script for a single file. + filename, extension = os.path.splitext(any_path) + try: + source_file = SourceFile(any_path) + dependencies.update(source_file.dependencies(verbose, strict)) + # 1.1.2. Check for supported language and size. + if os.stat(any_path).st_size < max_file_size: + if (os.path.splitext(source_file)[0] not in ignored_files) and ( + os.path.splitext(source_file)[1] not in ignored_extensions + ): + # 1.1.3. Extract dependencies. + source_file = SourceFile(any_path) + dependencies.update(source_file.dependencies(verbose, strict)) + coverage_counter += 1 + else: + ignored_counter += 1 + raise TypeError + else: + raise MemoryError + except TypeError: + if verbose: + print("[dextractor]", end=" ") + print(Fore.RED + "ERROR:", end=" ") + print( + f"The file '{os.path.basename(filename)}{extension}' is not a source file." + ) + except NotImplementedError: + if verbose: + print("[dextractor]", end=" ") + print(Fore.YELLOW + "NOTICE:", end=" ") + print( + f"The file '{os.path.basename(filename)}{extension}' is not yet supported by this module." + ) + except MemoryError: + if verbose: + print("[dextractor]", end=" ") + print(Fore.RED + "ERROR:", end=" ") + print( + f"The file '{os.path.basename(filename)}{extension}' is too large." + ) + except IOError: + if verbose: + print("[dextractor]", end=" ") + print(Fore.RED + "ERROR:", end=" ") + print( + f"The file '{os.path.basename(filename)}{extension}' could not be accessed." + ) + else: + raise Exception( + "This is not a file or a directory. It might be a special file (e.g. socket, FIFO, device file), which is unsupported by this package. " + ) + + results["configurations"] = list(configurations) + results["dependencies"] = list(dependencies) + return results \ No newline at end of file diff --git a/profiler/src/extractor/dextractor/__pycache__/__init__.cpython-36.pyc b/profiler/src/extractor/dextractor/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 6d9cd2430601efd31e3f92a3c0f1a39764cb8c27..0000000000000000000000000000000000000000 Binary files a/profiler/src/extractor/dextractor/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/profiler/src/extractor/dextractor/src/__pycache__/exclusions.cpython-36.pyc b/profiler/src/extractor/dextractor/src/__pycache__/exclusions.cpython-36.pyc deleted file mode 100644 index 03560cc48d7b74f8ca26bb8266da00d33dafc06b..0000000000000000000000000000000000000000 Binary files a/profiler/src/extractor/dextractor/src/__pycache__/exclusions.cpython-36.pyc and /dev/null differ diff --git a/profiler/src/extractor/dextractor/src/__pycache__/languages.cpython-36.pyc b/profiler/src/extractor/dextractor/src/__pycache__/languages.cpython-36.pyc deleted file mode 100644 index 10f75e2da40349c66fe5155e481a3a5684795903..0000000000000000000000000000000000000000 Binary files a/profiler/src/extractor/dextractor/src/__pycache__/languages.cpython-36.pyc and /dev/null differ diff --git a/profiler/src/extractor/dextractor/src/__pycache__/parser.cpython-36.pyc b/profiler/src/extractor/dextractor/src/__pycache__/parser.cpython-36.pyc deleted file mode 100644 index d728959a115ad9a1e4fd3e19312bf75be2368f5f..0000000000000000000000000000000000000000 Binary files a/profiler/src/extractor/dextractor/src/__pycache__/parser.cpython-36.pyc and /dev/null differ diff --git a/profiler/src/extractor/dextractor/src/exclusions.py b/profiler/src/extractor/dextractor/src/exclusions.py new file mode 100644 index 0000000000000000000000000000000000000000..6c89f38f33b2612efafab99d2c385d9d959bfdbd --- /dev/null +++ b/profiler/src/extractor/dextractor/src/exclusions.py @@ -0,0 +1,65 @@ +# --------- +# This source file is part of the Dependency Extractor python open source package. +# Copyright (c) 2021, Alexandros Raikos tou Konstantinou. +# +# Licensed under the MIT License. +# --------- +# +# Define a list of known configuration files, file extensions and names to be ignored. + +configuration_files = [ + "Dockerfile", + "Makefile", + "docker-compose.yml", + "serverless.yaml", + "serverless.yml" +] + +ignored_files = [ + ".gitignore", + ".DS_Store", + "README", + "LICENSE", + "MAINTAINERS", + "BUGS", + "CONTRIBUTING", + "CONTRIBUTORS", + "AUTHORS", + "PATENTS", +] + +ignored_extensions = [ + ".png", + ".ico", + ".jpg", + ".svg", + ".tiff", + ".yml", + ".yaml", + ".rst", + ".json", + ".xml", + ".html", + ".har", + ".properties", + ".plist", + ".all", + ".txt", + ".doc", + ".xls", + ".ppt", + ".docx", + ".xlsx", + ".pptx", + ".csv", + ".jmx", + ".cmd", + ".sh", + ".mod", + ".sum", + ".tpl", + ".npy", + ".npz", + ".ini", + ".inc", +] \ No newline at end of file diff --git a/profiler/src/extractor/dextractor/src/languages.py b/profiler/src/extractor/dextractor/src/languages.py new file mode 100644 index 0000000000000000000000000000000000000000..25e5d4bbdf07af330e967f9439b5073dfc079d67 --- /dev/null +++ b/profiler/src/extractor/dextractor/src/languages.py @@ -0,0 +1,114 @@ +# --------- +# This source file is part of the Dependency Extractor python open source package. +# Copyright (c) 2021, Alexandros Raikos tou Konstantinou. +# +# Licensed under the MIT License. +# --------- +# +# Define supported languages with their corresponding compiled import regex. +# ----- +# NOTE: Strict suffix queries exclude local and relative imports. +# NOTE: These expressions were formulated with the help of https://regex101.com. + +import re +from typing import List + + +class ProgrammingLanguage: + def __init__(self, name, extensions, expressions, supports_grouped_dependencies): + self.name: str = name + self.extensions: List[str] = extensions + self.expressions = expressions + self.supports_grouped_dependencies = supports_grouped_dependencies + + +supported_languages = [ + ProgrammingLanguage( + "C++", + extensions=[".cpp", ".hpp"], + expressions={ + "dependencies": { + "regular": re.compile( + r"#include [<\"](?P[a-zA-Z0-9!@#$%^&*()_+\-=\[\]{};':\"\\|,.<>\/?]+)[\">]" + ), + "strict": re.compile( + r"#include [<\"](?P[^.][a-zA-Z0-9!@#$%^&*()_+\-=\[\]{};':\"\\|,.<>\/?]+[^.hpp][^.h])[\">]" + ), + } + }, + supports_grouped_dependencies = False + ), + ProgrammingLanguage( + "C", + extensions=[".c", ".h"], + expressions={ + "dependencies": { + "regular": re.compile( + r"#include [<\"](?P[a-zA-Z0-9!@#$%^&*()_+\-=\[\]{};':\"\\|,.<>\/?]+)[\">]" + ), + "strict": re.compile( + r"#include [<\"](?P[^.][a-zA-Z0-9!@#$%^&*()_+\-=\[\]{};':\"\\|,.<>\/?]+[^.hpp][^.h])[\">]" + ), + } + }, + supports_grouped_dependencies = False + ), + # TODO: #1 Needs improvement (it only reads last dependency in list + strict mode.) + ProgrammingLanguage( + "Go", + extensions=[".go"], + expressions={ + "dependencies": { + "container": re.compile( + r"import[\s]+\((?P[a-zA-Z0-9!@#$%^&*_+\-\[\]{};':\"\s\\.\/?]+)\)" + ), + "internal": re.compile( + r"[\s]+\"(?P.*)\"" + ) + } + }, + supports_grouped_dependencies = True + ), + ProgrammingLanguage( + "Java", + extensions=[".java"], + expressions={ + "dependencies": { + "regular": re.compile( + r"import (?P[a-zA-Z0-9!@#$%^&*_+\-\[\]{};':\"\\.\/?]+);" + ) + } + }, + supports_grouped_dependencies = False + ), + ProgrammingLanguage( + "Python", + extensions=[".py", ".pyi"], + expressions={ + "dependencies": { + "regular": re.compile( + r"^(?:[ ]|)+(?:import|from) (?P[^_][a-zA-Z0-9!@#$%^&*()_+\-\[\]{}.;':\"\\\/?]+)" + ), + "strict": re.compile( + r"^(?:[ ]|)+(?:import|from) (?P[^_.][a-zA-Z0-9!@#$%^&*()_+\-\[\]{};':\"\\\/?]+)" + ), + } + }, + supports_grouped_dependencies = False + ), + ProgrammingLanguage( + "JavaScript", + extensions=[".json"], + expressions={ + "dependencies": { + "container": re.compile( + r"\"dependencies\":[ |]{(?P[a-zA-Z0-9!@#$%^&*_+\-\[,\];':\"\s\\.\/?]+)" + ), + "internal": re.compile( + r"[\s]+\"(?P.*)\":" + ) + } + }, + supports_grouped_dependencies = True + ), +] \ No newline at end of file diff --git a/profiler/src/extractor/dextractor/src/parser.py b/profiler/src/extractor/dextractor/src/parser.py new file mode 100644 index 0000000000000000000000000000000000000000..79f500f82adb20474b617653d74493e5e4e61e84 --- /dev/null +++ b/profiler/src/extractor/dextractor/src/parser.py @@ -0,0 +1,88 @@ +# --------- +# This source file is part of the Dependency Extractor python open source package. +# Copyright (c) 2021, Alexandros Raikos tou Konstantinou. +# +# Licensed under the MIT License. +# --------- + +# Standard library. +import os +from os import name, path +from typing import Set + +# Third party dependencies. +from colorama import Fore + +# Local package dependencies. +from .languages import supported_languages + + +class SourceFile: + def __init__(self, path): + """ + Retrieve any path and analyse all source file content for library dependencies. + + Parameters + ---------- + - `path : str` + A string containing the full system path for the source file. + - `language : ProgrammingLanguage` + A ProgrammingLanguage object which indicates the programming language of this source file. + """ + if os.path.isfile(path): + self.path: str = path + else: + raise TypeError + self.name, self.extension = os.path.splitext(path) + known = False + for specific_language in supported_languages: + if self.extension in specific_language.extensions: + self.language = specific_language + known = True + if not known: + raise NotImplementedError + + def dependencies(self, verbose: bool = False, strict: bool = False) -> Set: + """ + Read the source file and extract imported package names using regular expressions. + """ + # 0. Initialize empty set of discovered dependencies. + found = set() + + # 1. When file is written in a supported language. + # 1.1. When file isn't too large. + # ----- + # NOTE: Most source files for most use cases are not + # expected to exceed 5MB in size (editable). + try: + # 1.1.1. Open file for reading. + file = open(self.path, "r") + if verbose: + print("[dextractor]", end=" ") + print(Fore.CYAN + "INFORMATION:", end=" ") + print(f"Reading {os.path.basename(file.name)}") + + # 1.1.2. Match regex and obtain named capture group. + if self.language.supports_grouped_dependencies: + containerQuery = self.language.expressions["dependencies"]["container"] + grouped = containerQuery.findall(file.read()) + if grouped: + query = self.language.expressions["dependencies"]["internal"] + matches = query.findall(grouped[0]) + else: + query = self.language.expressions["dependencies"]["regular"] + matches = query.findall(file.read()) + if "matches" in locals(): + found.update(matches) + + if not found and verbose: + print("[dextractor]", end=" ") + print(Fore.CYAN + "INFORMATION:", end=" ") + print("This file doesn't include any dependencies.") + # 1.1.3. Close file for memory optimisation. + file.close() + except IOError: + print("[dextractor]", end=" ") + print(Fore.RED + "ERROR:", end=" ") + print(f"There was an IO error when trying to access the file '{name}'.") + return found \ No newline at end of file diff --git a/profiler/src/extractor/pyproject.toml b/profiler/src/extractor/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..b5a3c468d9e85e7fa7469c3a90d47b48ab93e54a --- /dev/null +++ b/profiler/src/extractor/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel" +] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/profiler/src/extractor/requirements.txt b/profiler/src/extractor/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3d90aaa5fcacf1730f7ace07e576ba9bff7bc562 --- /dev/null +++ b/profiler/src/extractor/requirements.txt @@ -0,0 +1 @@ +colorama \ No newline at end of file diff --git a/profiler/src/extractor/setup.cfg b/profiler/src/extractor/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0472ebbb44f94e9d012da4374da4abb1195ab5e7 --- /dev/null +++ b/profiler/src/extractor/setup.cfg @@ -0,0 +1,20 @@ +[metadata] +# replace with your username: +name = dextractor +version = 0.0.1 +author = Alexandros Raikos +author_email = alexandros@araikos.gr +description = A Python library which extracts library dependencies from source files written in most mainstream programming languages. +long_description = file: README.md +long_description_content_type = text/markdown +url = https://github.com/alexandrosraikos/dependency-extractor +project_urls = + Bug Tracker = https://github.com/alexandrosraikos/dependency-extractor/issues +classifiers = + Programming Language :: Python :: 3 + License :: OSI Approved :: MIT License + Operating System :: OS Independent + +[options] +packages = find: +python_requires = >=3.6 \ No newline at end of file diff --git a/profiler/src/extractor/tests/README.md b/profiler/src/extractor/tests/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f1db809cc7ffde219a1c9bc78aacef23493de1c --- /dev/null +++ b/profiler/src/extractor/tests/README.md @@ -0,0 +1,11 @@ +# Testing + +This directory is populated at will when cloning the repository. It is recommended to clone a complex open source repository in this directory and run the tests. + +# How to + +To run unit tests using `unittest` on package modules: + +1. Clone a repository into the `data` folder. +1. From this project's root, run: `python -m unittest tests/test-analyse.py` +1. The testing script will return all imports found in the source code of your chosen repository. diff --git a/profiler/src/extractor/tests/test-analyse.py b/profiler/src/extractor/tests/test-analyse.py new file mode 100644 index 0000000000000000000000000000000000000000..f8659db6f2ee7de4246f448deb39cf8da9c7f1b9 --- /dev/null +++ b/profiler/src/extractor/tests/test-analyse.py @@ -0,0 +1,40 @@ +# --------- +# This testing file is part of the Dependency Extractor python package. +# Copyright (c) 2021, Alexandros Raikos tou Konstantinou. +# +# Licensed under the MIT License. +# --------- + +import unittest +import os +from pprint import pprint + +from dextractor import analyse + +if os.getcwd().endswith("dependency-extractor") == False: + raise Exception( + """ + -------------------- ERROR -------------------- + Please launch the script from the root directory + of the dextractor package. + ----------------------------------------------- + """ + ) + + +class LanguageTest(unittest.TestCase): + dir_path = os.path.dirname(os.path.realpath(__file__)) + + def test_data_analysis(self): + """ + Test using the data directory. + """ + results = analyse(os.getcwd() + "/tests/data",5000000,True,True) + if not results: + print("\nNo results were returned.") + else: + pprint(results, compact=True) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/profiler/src/filter.py b/profiler/src/filter.py new file mode 100644 index 0000000000000000000000000000000000000000..dd35ea5faba12cbc14d248a48eaeeccef7f86c09 --- /dev/null +++ b/profiler/src/filter.py @@ -0,0 +1,64 @@ +import os, time, json +from pydantic import BaseModel + +labels = ['edge','serverless','docker','hpc','gpu','fpga'] +languages = ['Python','Go','Java','C','C++','Javascript','Cpp','None'] + +class Repo(BaseModel): + name: str + url: str + idgit: str + status: str + n_error: int + type: str + labels: list + topics: list + date: str + component: str + code: int + language: str + +class Filter(): + def __init__(self): + self.topics = None + self.labels = None + self.languages = None + + def setTopics(self,topics): + self.topics = topics + def setLabels(self,labels): + self.labels = labels + def setLanguages(self, languages): + self.languages = languages + def validate(self, entries, _type): + for entry in entries: + if _type == 'language': + if entry in self.languages: + return True + if _type == 'topic': + if entry in self.topics: + return True + if _type == 'label': + if entry in self.labels: + return True + return False + + def isValidModel(self, model): + #doap model object + _languages = model.getLanguages() + if not self.validate(_languages,'language'): + print("Language not relevant {0} found for project {1}, valid languages are {2}".format(_languages,model.getName(),languages)) + return False + """labels = model.getLabels() + if not self.validate(labels,'label'): + return False """ + topics = model.getTopics() + if not self.validate(topics,'topic'): + print("Topic not relevant found {0} for project {1} valid topics are {2}".format(topics,model.getName(),labels)) + return False + return True + + def getRepo(self, model): + return Repo(name=model.getName(), url=model.getUrl(), idgit=model.getID(), status='NotStarted',language=model.getLanguages(), n_error=0, type='', labels= model.getLabels(), topics=model.getTopics(), date=model.getDate(), component=model.getComponentName(), code=model.getCode()) + + \ No newline at end of file diff --git a/profiler/src/log/.DS_Store b/profiler/src/log/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/profiler/src/log/.DS_Store differ diff --git a/profiler/src/matcher.py b/profiler/src/matcher.py new file mode 100644 index 0000000000000000000000000000000000000000..c9576aba4510a67452926d180e3afcfb03a85492 --- /dev/null +++ b/profiler/src/matcher.py @@ -0,0 +1,83 @@ +import os, json, time +from bagofwords import Classifier +from tensorflow.keras.preprocessing.sequence import pad_sequences + +max_features_length = int(os.environ.get("MAX_FEATURE_LENGTH","100")) +class Matcher(): + def __init__(self, profiler): + self.profiler = profiler + self.words = None + self.classifier = Classifier() + self.loadWords() + + def chunks(self, lst, n): + for i in range(0, len(lst), n): + yield lst[i:i + n] + + def voteMajority(self, _list_result, tokens, method,repo): + _result = {} + for res in _list_result: + if res == None: + continue + for k, v in res[0].items(): + if k in _result: + if v > _result[k]: + _result[k] = v + else: + if v >= 0.5: + _result[k] = v + return list(_result.keys()), tokens, method, repo + + def loadWords(self): + words = open('words.txt','r').read() + words = words.replace("\n"," ") + words = words.replace("."," ") + words = words.replace(","," ") + words = words.replace("?"," ") + self.words = ["_" +w+ "_" for w in words.split(" ")] + self.words = list(dict.fromkeys(self.words)) + print("Padding list size = {0}".format(len(self.words))) + + def predict(self, data, repo): + if data['method'] == 'bag_of_worlds': + tokens = data['tokens'] + print("{0} tokens for the project {1}".format(len(tokens),repo.name)) + if len(tokens) < 1: + return [], tokens, data['method'], repo + if len(tokens) <= max_features_length: + index = 0 + while len(tokens) < max_features_length: + tokens.append(self.words[index]) + index +=1 + #tokens.extend(self.words) + #return [], tokens, data['method'], repo + self.classifier.setLanguage(repo.language) + if self.classifier.setFeaturesSmart(tokens, self.words,0): + y = self.classifier.predict() + print(y) + return self.voteMajority([y], tokens, data['method'],repo) + else: + return [], tokens, data['method'], repo + """ + if len(tokens) < max_features_length: + all_tokens = [tokens] + else: + all_tokens = list(self.chunks(tokens, max_features_length)) + result = [] + all_tokens = pad_sequences(all_tokens,maxlen=max_features_length,padding="post",value="_pad_",dtype=object) + print("{0} groups found after chunking and padding".format(len(all_tokens))) + for _tokens in all_tokens: + if len(_tokens) != max_features_length: + continue + print("Size tokens in matcher {0}".format(len(tokens))) + #padded = _tokens.count("_pad_") + #print("{0} padded values".format(padded)) + self.classifier.setLanguage(repo.language) + if self.classifier.setFeatures(_tokens): + y = self.classifier.predict() + result.append(y) + return self.voteMajority(result, all_tokens, data['method'],repo) + """ + else: + print("Method is "+ data['method']) + return None \ No newline at end of file diff --git a/profiler/src/models/.DS_Store b/profiler/src/models/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..916b5bd41cbcf92b3cae4408e9c523a58137a9f3 Binary files /dev/null and b/profiler/src/models/.DS_Store differ diff --git a/profiler/src/models/nn.model/saved_model.pb b/profiler/src/models/nn.model/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..24e5c9bfad34962d6de215abd30c77a0fbe6001f Binary files /dev/null and b/profiler/src/models/nn.model/saved_model.pb differ diff --git a/profiler/src/models/nn.model/variables/variables.data-00000-of-00001 b/profiler/src/models/nn.model/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..352022e11f436d0e73fc38b921efe8a87917614b Binary files /dev/null and b/profiler/src/models/nn.model/variables/variables.data-00000-of-00001 differ diff --git a/profiler/src/models/nn.model/variables/variables.index b/profiler/src/models/nn.model/variables/variables.index new file mode 100644 index 0000000000000000000000000000000000000000..f24c96195db21987b499a79194a18c328b39ba1c Binary files /dev/null and b/profiler/src/models/nn.model/variables/variables.index differ diff --git a/profiler/src/models/nn.model_C++/saved_model.pb b/profiler/src/models/nn.model_C++/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..d17653ae0a6949befbb269d93ab07c5c55215efa Binary files /dev/null and b/profiler/src/models/nn.model_C++/saved_model.pb differ diff --git a/profiler/src/models/nn.model_C++/variables/variables.data-00000-of-00001 b/profiler/src/models/nn.model_C++/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..bd79c882079091350346671abe8c102a4885de84 Binary files /dev/null and b/profiler/src/models/nn.model_C++/variables/variables.data-00000-of-00001 differ diff --git a/profiler/src/models/nn.model_C++/variables/variables.index b/profiler/src/models/nn.model_C++/variables/variables.index new file mode 100644 index 0000000000000000000000000000000000000000..608d5a37f29bd8904dae098d24599db9d9f1cf2a Binary files /dev/null and b/profiler/src/models/nn.model_C++/variables/variables.index differ diff --git a/profiler/src/models/nn.model_C/saved_model.pb b/profiler/src/models/nn.model_C/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..0e76a4c0e326e5a1c56685fd1768da0f11ced40f Binary files /dev/null and b/profiler/src/models/nn.model_C/saved_model.pb differ diff --git a/profiler/src/models/nn.model_C/variables/variables.data-00000-of-00001 b/profiler/src/models/nn.model_C/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..f1d3e01439c627127cf230e705de048dd8665ed5 Binary files /dev/null and b/profiler/src/models/nn.model_C/variables/variables.data-00000-of-00001 differ diff --git a/profiler/src/models/nn.model_C/variables/variables.index b/profiler/src/models/nn.model_C/variables/variables.index new file mode 100644 index 0000000000000000000000000000000000000000..501606083d233cbd79bae8247f748c64051afeb5 Binary files /dev/null and b/profiler/src/models/nn.model_C/variables/variables.index differ diff --git a/profiler/src/models/nn.model_Cpp/saved_model.pb b/profiler/src/models/nn.model_Cpp/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..36a4f3f1c1fbd8bfe72f6604430eefe4ee9eaeb7 Binary files /dev/null and b/profiler/src/models/nn.model_Cpp/saved_model.pb differ diff --git a/profiler/src/models/nn.model_Cpp/variables/variables.data-00000-of-00001 b/profiler/src/models/nn.model_Cpp/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..ab94091fb9c13e4a89dffa3bf3486c4043d71d35 Binary files /dev/null and b/profiler/src/models/nn.model_Cpp/variables/variables.data-00000-of-00001 differ diff --git a/profiler/src/models/nn.model_Cpp/variables/variables.index b/profiler/src/models/nn.model_Cpp/variables/variables.index new file mode 100644 index 0000000000000000000000000000000000000000..313c481f13b8375e6475cb6bce7cbe8e1ba363eb Binary files /dev/null and b/profiler/src/models/nn.model_Cpp/variables/variables.index differ diff --git a/profiler/src/models/nn.model_Go/saved_model.pb b/profiler/src/models/nn.model_Go/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..ff9cec460023e1dedc5095275a0c2977bec3e142 Binary files /dev/null and b/profiler/src/models/nn.model_Go/saved_model.pb differ diff --git a/profiler/src/models/nn.model_Go/variables/variables.data-00000-of-00001 b/profiler/src/models/nn.model_Go/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..6f5675510f8d2b8617871118c62dee0b9fa5b771 Binary files /dev/null and b/profiler/src/models/nn.model_Go/variables/variables.data-00000-of-00001 differ diff --git a/profiler/src/models/nn.model_Go/variables/variables.index b/profiler/src/models/nn.model_Go/variables/variables.index new file mode 100644 index 0000000000000000000000000000000000000000..ffbd09e860720ce5272c125ad08891c948206ce2 Binary files /dev/null and b/profiler/src/models/nn.model_Go/variables/variables.index differ diff --git a/profiler/src/models/nn.model_Java/saved_model.pb b/profiler/src/models/nn.model_Java/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..8505361b537ee38c2993686b9eb2dcc33123618a Binary files /dev/null and b/profiler/src/models/nn.model_Java/saved_model.pb differ diff --git a/profiler/src/models/nn.model_Java/variables/variables.data-00000-of-00001 b/profiler/src/models/nn.model_Java/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..e194f4e6b17a743f46bfc9911724935ac3cffe41 Binary files /dev/null and b/profiler/src/models/nn.model_Java/variables/variables.data-00000-of-00001 differ diff --git a/profiler/src/models/nn.model_Java/variables/variables.index b/profiler/src/models/nn.model_Java/variables/variables.index new file mode 100644 index 0000000000000000000000000000000000000000..32ab9e72fb3dd9bc567ff618883834f41d666f3f Binary files /dev/null and b/profiler/src/models/nn.model_Java/variables/variables.index differ diff --git a/profiler/src/models/nn.model_Javascript/saved_model.pb b/profiler/src/models/nn.model_Javascript/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..0e1574f47f6a3b056c6aaf92ec55a70b34a38059 Binary files /dev/null and b/profiler/src/models/nn.model_Javascript/saved_model.pb differ diff --git a/profiler/src/models/nn.model_Javascript/variables/variables.data-00000-of-00001 b/profiler/src/models/nn.model_Javascript/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..28888e04ce66c5c738ff7e710685dea1de87efaa Binary files /dev/null and b/profiler/src/models/nn.model_Javascript/variables/variables.data-00000-of-00001 differ diff --git a/profiler/src/models/nn.model_Javascript/variables/variables.index b/profiler/src/models/nn.model_Javascript/variables/variables.index new file mode 100644 index 0000000000000000000000000000000000000000..e508e63985215c5e3c1134d7d5061e24422595f8 Binary files /dev/null and b/profiler/src/models/nn.model_Javascript/variables/variables.index differ diff --git a/profiler/src/models/nn.model_Python/saved_model.pb b/profiler/src/models/nn.model_Python/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..b36db8c1d43f43fc1bf009a41d84efe25726a09f Binary files /dev/null and b/profiler/src/models/nn.model_Python/saved_model.pb differ diff --git a/profiler/src/models/nn.model_Python/variables/variables.data-00000-of-00001 b/profiler/src/models/nn.model_Python/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..fbd7cc7a8c3fd098b431b7131845977ef3e7d937 Binary files /dev/null and b/profiler/src/models/nn.model_Python/variables/variables.data-00000-of-00001 differ diff --git a/profiler/src/models/nn.model_Python/variables/variables.index b/profiler/src/models/nn.model_Python/variables/variables.index new file mode 100644 index 0000000000000000000000000000000000000000..4ca4cbd6213522fba3a00451cd5db67911ffe309 Binary files /dev/null and b/profiler/src/models/nn.model_Python/variables/variables.index differ diff --git a/profiler/src/profiler.py b/profiler/src/profiler.py new file mode 100644 index 0000000000000000000000000000000000000000..b8970c640a408322643f638fb9bcc0c895a3a166 --- /dev/null +++ b/profiler/src/profiler.py @@ -0,0 +1,278 @@ +import os, time, json, requests, stomp +import filter, logging +from filter import Filter +from downloader import Downloader +from matcher import Matcher +from analysermanager import AnalyserManager +from doap import DoapModel +from elasticsearch import Elasticsearch +from threading import Thread +from amqp.MorphemicConnection import Connection + +local_repositories_folder = os.environ.get("LOCAL_REPOSITORIES_FOLDER","/tmp/downloader") +url_knowledge_base = os.environ.get("URL_KNOWLEDGE_BASE","http://52.19.168.139:9200") +max_repositories_per_page = int(os.environ.get("MAX_REPOSITORIES_PER_PAGE","100")) +elasticsearch_hostname = os.environ.get("ELASTICSEARCH_HOSTNAME","localhost") + +activemq_hostname = os.environ.get("ACTIVEMQ_HOST", "localhost") +demo_mode = os.environ.get("DEMO_MODE","disabled") + +activemq_port = int(os.environ.get("ACTIVEMQ_PORT", "61613")) +activemq_topic = os.environ.get("ACTIVEMQ_TOPIC", "static-topic-1") +activemq_subs_key = os.environ.get("ACTIVEMQ_SUBS_KEY", "subs-1") +activemq_username = os.environ.get("ACTIVEMQ_USERNAME", "aaa") +activemq_password = os.environ.get("ACTIVEMQ_PASSWORD", "111") +profiler_topic = os.environ.get("PROFILER_TOPIC","/topic/profiler") + +api_report_url = os.environ.get("API_REPORT_URL","http://localhost:7878/report") + +labels = filter.labels +languages = filter.languages + +github_token = "69de4167321f897c181afd63342c34b2f7fb482c" +headers = {"Authorization": "token {0}".format(github_token), 'Content-Type':'application/json'} + +logname = "./log/profiler.log" +logging.basicConfig(filename=logname,filemode='a',format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',datefmt='%H:%M:%S',level=logging.DEBUG) + +class Listener(object): + def __init__(self, conn, handler): + self.conn = conn + self.count = 0 + self.handler = handler + self.start = time.time() + + def on_error(self, frame): + print("received an error %s" % frame.body) + + def on_message(self, frame): + self.handler(frame.body) + +class Consumer(Thread): + def __init__(self, handler, queue): + self.handler = handler + self.queue = queue + self.conn = None + self._stop = False + super(Consumer,self).__init__() + + def stop(self): + self._stop = True + if self.conn: + self.conn.disconnect() + + def run(self): + connected = False + while not connected: + if self._stop: + break + try: + print('Subscribe to the topic {0}'.format(self.queue)) + logging.info('Subscribe to the topic {0}'.format(self.queue)) + self.conn = Connection(username=activemq_username, password=activemq_password, host=activemq_hostname,port=activemq_port, debug=False) + self.conn.connect() + self.conn.set_listener('', Listener(self.conn, self.handler)) + self.conn.subscribe(destination=self.queue, id=1000, ack='auto') + connected = True + except Exception as e: + print("Could not subscribe") + logging.error("Could not subscribe to the topic {0}".format(self.queue)) + print(e) + connected = False + +class ActiveMQConsumer(Thread): + def __init__(self, hostname, port, username, password, topic, handler): + self.hostname = hostname + self.port = port + self.topic = topic + self.handler = handler + self.username = username + self.conn = None + self.password = password + self.status = None + self.normal_stop = False + super(ActiveMQConsumer, self).__init__() + + def getStatus(self): + return self.status + + def stop(self): + self.normal_stop = True + + def run(self): + print("ActiveMQ consumer started started") + print("Hostname : {0}\nPort: {1}\nTopic: {2}".format(self.hostname, self.port, self.topic)) + + while True: + if self.normal_stop: + break + print("Trying to connect ...") + try: + #conn = stomp.Connection(host_and_ports=[(self.hostname, self.port)]) + #conn.set_listener("", Listener(conn, self.handler)) + #conn.connect(login=self.username, passcode=self.password) + #conn.subscribe(destination=self.topic, id=1, ack="auto") + + self.conn = Connection(username=activemq_username, password=activemq_password, host=activemq_hostname,port=activemq_port, debug=False) + self.conn.connect() + self.conn.set_listener('', Listener(self.conn, self.handler, self.queue)) + self.conn.subscribe(destination=self.queue, id=1000, ack='auto') + + self.status = "started" + print("Waiting for messages...") + while 1: + if self.normal_stop: + break + time.sleep(5) + except Exception as e: + print("Could not connect to ActiveMQ broker") + self.status = "error" + print(e) + time.sleep(5) + print("End process") + self.status = "stopped" + + +class ElasticsearchController(): + def __init__(self): + self.db = Elasticsearch() + def addRepository(self, id, name, url, date, labels, topics): + self.db['Repositories'].insert_one({'id': id, 'name': name, 'url': url, 'date': date, 'tokens':'', 'topics': topics, 'labels': labels}) + def addTokens(self, id, tokens): + self.db['Repositories'].update_one({'id': id},{"$set":{'tokens': tokens}}) + def isRepositoryExist(self, url): + return self.db['Repositories'].count_documents({'id':url}) > 0 + def isRepositoryHandled(self, url): + if not self.isRepositoryExist(url): + return False + return len(self.db['Repositories'].find({'url': url})['tokens']) > 0 + +class Profiler(): + def __init__(self): + self.downloder = Downloader(self) + self.analysermanager = AnalyserManager(self) + self.matcher = Matcher(self) + self.filter = Filter() + self.db = ElasticsearchController() + #self.activemq_consumer = ActiveMQConsumer(activemq_hostname,activemq_port,activemq_username,activemq_password,profiler_topic,self.parseData) + self.activemq_consumer = Consumer(self.parseData, profiler_topic) + self._from = 1 + self._to = 0 + self.current_analyzed_project = None + + def makeDoapObject(self, _json): + return DoapModel(_json['idDoap'],_json['name'],_json['homepage'],_json['topics'],_json['labels'],_json['languages'], _json['published_at'],_json['component'],_json['code']) + + def addProject(self, repo): + self.db.addRepository(repo.idgit, repo.name, repo.url, repo.date, repo.labels, repo.topics) + self.analysermanager.addProject(repo) + + def deleteProject(self,repo): + self.analysermanager.deleteProject(repo) + + def saveFeatures(self,method, repo, data): + if method == 'bag_of_worlds': + tokens = data['tokens'] + self.db.addTokens(repo.idgit, tokens) + if method == "graph": + pass + + def canBeAnalyzed(self, model): + #return True + return self.filter.isValidModel(model) + + def sendToMatcher(self, data, repo): + print("Features extraction completed for project {0} of the component {1}".format(repo.name, repo.component)) + prediction, tokens, method, _repo = self.matcher.predict(data, repo) + if method == "bag_of_worlds": + _data = {"code": _repo.code, "data": {"component_name": _repo.component,"suggested_categories": prediction}} + requests.post(url=api_report_url, data=json.dumps(_data)).text + + def getUrlAndName(self, url): + url = url[:url.index('releases')-1] + name = url[url.rindex('/')+1:] + return name, url + + def parseData(self, data): + _json_component = None + try: + _json_component = json.loads(data) + print(_json_component) + except Exception as e: + print(e) + print("Cannot parse data received") + if _json_component: + if _json_component["request"] == 'suggest': + code = _json_component["code"] + for component in _json_component["data"]: + component_name, git_url, topics, language = component['component_name'], component['repository'], component['categories'], component["language"].capitalize() + self.classifier(component_name, git_url,topics,language, str(int(time.time())),code) + + + def classifier(self, component_name, git_url, topics, languages, published,code): + #name, url = self.getUrlAndName(git_url) + name = git_url[git_url.rindex('/')+1:] + _doap = {'idDoap': 0,'name': name, 'homepage': git_url,'topics':topics, 'labels':topics,'languages':languages, 'published_at': published, 'component': component_name, 'code': code} + model = self.makeDoapObject(_doap) + repo = self.filter.getRepo(model) + repo.type = 'matcher' + self.downloder.addUrl(repo.name, repo.url, repo.idgit, 'matcher', repo.labels, repo.topics, published,component_name,repo.code,repo.language) + self.current_analyzed_project = repo + + def parseRepoUrls(self, response): + try: + _json = json.loads(response) + print("{0} repositories url were found".format(len(_json['hits']['hits']))) + result = [] + for project in _json['hits']['hits']: + try: + _json = project['_source'] + if not '_id' in project or not 'downloadPage' in _json: + continue + if self.db.isRepositoryExist(project['_id']): + continue + name = _json['projectName'] + _url = None + if 'typeRepo' in _json and _json['typeRepo'] == "gitHub": + _url = "https://github.com/" + name + name = _url[_url.rindex('/')+1:] + _doap = {'idDoap': project['_id'],'name': name, 'homepage': _url,'topics':_json['topic'], 'labels':_json['label'],'languages':[], 'published_at': _json['created'],'component': name, 'code': 0} + model = self.makeDoapObject(_doap) + if self.canBeAnalyzed(model): + repo = self.filter.getRepo(model) + repo.type = 'analyser' + self.downloder.addUrl(repo.name, repo.url, repo.idgit, 'analyser', repo.labels, repo.topics, repo.date, repo.name, repo.code, repo.language) + #self.db.addRepository(repo.idgit, repo.name, repo.url, _doap['published_at']) + else: + print("Repositories {0} cannot be added".format(_doap['name'])) + except Exception as e: + print(e) + return result + except Exception as e: + print(e) + return None + + def start(self): + print("The Profiler started ...") + + self.filter.setLabels(labels) + self.filter.setTopics(labels) + self.filter.setLanguages(languages) + + self.downloder.start() + self.activemq_consumer.start() + while True: + #self._to = self._from + max_repositories_per_page + #test matcher + try: + response = requests.get(url=url_knowledge_base+'/knowbase/_search/', headers=headers) + self.parseRepoUrls(response.text) + #self._from = self._to + 1 + except Exception as e: + print("Cannot reach KB",e) + time.sleep(60*60) + + +if __name__ == "__main__": + profiler = Profiler() + profiler.start() \ No newline at end of file diff --git a/profiler/src/requirements.txt b/profiler/src/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..81234a9865de5c4b4571df8bd912922b40a2417a --- /dev/null +++ b/profiler/src/requirements.txt @@ -0,0 +1,13 @@ +gitpython +numpy >= 1.19.5 +pydantic==1.6.1 +requests +tensorflow +sklearn +pandas +elasticsearch +colorama +stomp.py +fastapi +uvicorn +scipy \ No newline at end of file diff --git a/profiler/src/start.sh b/profiler/src/start.sh new file mode 100755 index 0000000000000000000000000000000000000000..25aa52080312359d07b31f880b37fa4889202b21 --- /dev/null +++ b/profiler/src/start.sh @@ -0,0 +1,3 @@ +#!/bin/sh +python -u api.py & +python -u profiler.py & \ No newline at end of file diff --git a/profiler/src/start_api.sh b/profiler/src/start_api.sh new file mode 100755 index 0000000000000000000000000000000000000000..7e50f450bd00374fcb7ed3d52f20dc6b63dfab82 --- /dev/null +++ b/profiler/src/start_api.sh @@ -0,0 +1,2 @@ +#!/bin/bash +python -u api.py diff --git a/profiler/src/start_profiler.sh b/profiler/src/start_profiler.sh new file mode 100755 index 0000000000000000000000000000000000000000..0dde35691df1934e9b81b9a46bdf588531d08280 --- /dev/null +++ b/profiler/src/start_profiler.sh @@ -0,0 +1,2 @@ +#!/bin/bash +python -u profiler.py diff --git a/profiler/src/supervisord.conf b/profiler/src/supervisord.conf new file mode 100644 index 0000000000000000000000000000000000000000..afc25db8863f32f7cad503328ddaefee4caeb2b7 --- /dev/null +++ b/profiler/src/supervisord.conf @@ -0,0 +1,32 @@ +[unix_http_server] +file=/run/supervisor.sock +chmod=0770 + +[supervisord] +nodaemon=true +pidfile=/run/pid/supervisord.pid +logfile=/var/log/supervisor/supervisord.log +childlogdir=/var/log/supervisor +logfile_maxbytes=50MB +logfile_backups=1 + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[supervisorctl] +serverurl=unix:///run/supervisor.sock + + +[program:profiler] +command=/profiler/start_profiler.sh +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 + +[program:api] +command=/profiler/start_api.sh +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 diff --git a/profiler/src/test_clean.py b/profiler/src/test_clean.py new file mode 100644 index 0000000000000000000000000000000000000000..6526613694403d80eb3c00802856106d2d86764e --- /dev/null +++ b/profiler/src/test_clean.py @@ -0,0 +1,25 @@ +import os, re + +def cleanText(text): + if len(text) < 2: + return text + string = re.sub(r"[^A-Za-z]", " ", text) + string = re.sub('(?<=[A-Za-z])(?=[A-Z][a-z])', ' ', string,) + string = re.sub(r"\'s", " \'s", string) + string = re.sub(r"\'ve", " \'ve", string) + string = re.sub(r"n\'t", " n\'t", string) + string = re.sub(r"\'re", " \'re", string) + string = re.sub(r"\'d", " \'d", string) + string = re.sub(r"\'ll", " \'ll", string) + string = re.sub(r",", " , ", string) + string = re.sub(r"!", " ! ", string) + string = re.sub(r"\(", " \( ", string) + string = re.sub(r"\)", " \) ", string) + string = re.sub(r"\?", " \? ", string) + string = re.sub(r"\s{2,}", " ", string) + response = string.strip().lower() + return response + +text = "un example avec un text assez καποια εκκιληηα complexe with 234 and date 12/2/2021, we want to aussu-du avec_err jdtotow@yahoo.fr remove @ and ? so that ti have" + +print(cleanText(text)) \ No newline at end of file diff --git a/profiler/src/words.txt b/profiler/src/words.txt new file mode 100644 index 0000000000000000000000000000000000000000..91729844205bf70674ceeade2ddebea8fcb8bc53 --- /dev/null +++ b/profiler/src/words.txt @@ -0,0 +1,35 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Nulla facilisi morbi tempus iaculis. Tortor posuere ac ut consequat semper viverra nam libero justo. Elit sed vulputate mi sit. Aenean euismod elementum nisi quis eleifend. Ultricies tristique nulla aliquet enim tortor at auctor. Platea dictumst vestibulum rhoncus est pellentesque elit. Sagittis eu volutpat odio facilisis mauris sit amet massa vitae. Odio ut enim blandit volutpat maecenas volutpat blandit aliquam. Nec feugiat in fermentum posuere urna nec tincidunt praesent semper. Blandit cursus risus at ultrices mi tempus imperdiet. Aenean pharetra magna ac placerat vestibulum lectus. + +Enim neque volutpat ac tincidunt vitae semper. Duis convallis convallis tellus id interdum velit laoreet. Euismod quis viverra nibh cras pulvinar mattis nunc sed blandit. Pellentesque adipiscing commodo elit at imperdiet dui accumsan. Quisque non tellus orci ac auctor. Dignissim cras tincidunt lobortis feugiat vivamus. Vulputate sapien nec sagittis aliquam malesuada. Sollicitudin ac orci phasellus egestas tellus. Sagittis id consectetur purus ut. A scelerisque purus semper eget. Blandit turpis cursus in hac habitasse platea. Volutpat commodo sed egestas egestas fringilla phasellus. Eros in cursus turpis massa. Amet risus nullam eget felis eget nunc lobortis mattis. + +Eu mi bibendum neque egestas congue quisque egestas diam in. Id diam vel quam elementum pulvinar etiam non quam. Mattis rhoncus urna neque viverra justo nec. Porttitor lacus luctus accumsan tortor posuere. Platea dictumst vestibulum rhoncus est pellentesque elit. Posuere morbi leo urna molestie at elementum eu facilisis sed. In pellentesque massa placerat duis ultricies lacus sed. Vestibulum mattis ullamcorper velit sed ullamcorper. Eget lorem dolor sed viverra. Sem fringilla ut morbi tincidunt augue. Id semper risus in hendrerit gravida rutrum quisque. Non tellus orci ac auctor augue mauris augue neque gravida. Sit amet est placerat in egestas erat imperdiet sed. Cursus vitae congue mauris rhoncus aenean. In cursus turpis massa tincidunt dui ut ornare lectus. + +Scelerisque fermentum dui faucibus in ornare quam. Eros in cursus turpis massa tincidunt. Amet est placerat in egestas erat. Iaculis urna id volutpat lacus laoreet non curabitur gravida arcu. Dignissim enim sit amet venenatis urna cursus eget nunc scelerisque. Egestas integer eget aliquet nibh praesent tristique magna sit. Fames ac turpis egestas sed tempus urna. Turpis egestas integer eget aliquet nibh. Lectus sit amet est placerat in egestas erat imperdiet. Diam sit amet nisl suscipit adipiscing bibendum est. Vel pharetra vel turpis nunc eget lorem dolor sed viverra. Tortor dignissim convallis aenean et tortor at risus viverra adipiscing. + +Pretium vulputate sapien nec sagittis aliquam. Volutpat odio facilisis mauris sit amet massa. Diam sollicitudin tempor id eu nisl nunc mi ipsum. Duis at tellus at urna condimentum mattis. Fermentum posuere urna nec tincidunt praesent semper feugiat nibh. Placerat vestibulum lectus mauris ultrices eros in cursus. Et netus et malesuada fames. Fames ac turpis egestas sed tempus urna et pharetra pharetra. Quis ipsum suspendisse ultrices gravida dictum fusce. Nec nam aliquam sem et tortor consequat. Rhoncus dolor purus non enim praesent. Cursus in hac habitasse platea dictumst quisque sagittis purus sit. Vitae tempus quam pellentesque nec. Morbi tincidunt ornare massa eget egestas purus viverra. Convallis aenean et tortor at risus. + +Quam viverra orci sagittis eu volutpat. Egestas tellus rutrum tellus pellentesque eu tincidunt tortor. Elit eget gravida cum sociis natoque penatibus et. Morbi leo urna molestie at elementum eu facilisis sed. Ultricies mi eget mauris pharetra et. Id semper risus in hendrerit gravida rutrum. Odio tempor orci dapibus ultrices in iaculis. Ut tellus elementum sagittis vitae et leo duis ut. Viverra orci sagittis eu volutpat odio facilisis mauris. Aliquet porttitor lacus luctus accumsan tortor posuere ac ut. Ornare quam viverra orci sagittis eu volutpat. Ut aliquam purus sit amet luctus venenatis lectus. +Ullamcorper velit sed ullamcorper morbi tincidunt. Netus et malesuada fames ac turpis egestas integer. Sit amet porttitor eget dolor morbi. Et magnis dis parturient montes nascetur. Enim eu turpis egestas pretium aenean. Et egestas quis ipsum suspendisse. Neque aliquam vestibulum morbi blandit cursus. Ut porttitor leo a diam sollicitudin tempor. Diam maecenas ultricies mi eget mauris pharetra. Massa vitae tortor condimentum lacinia quis vel. Laoreet non curabitur gravida arcu ac tortor dignissim convallis. Aliquam sem fringilla ut morbi tincidunt augue interdum velit. Sapien eget mi proin sed libero enim sed. Aliquam faucibus purus in massa tempor nec feugiat nisl pretium. Tincidunt tortor aliquam nulla facilisi cras fermentum odio eu. Consequat interdum varius sit amet mattis vulputate enim. Lacus vel facilisis volutpat est velit egestas dui id ornare. Turpis tincidunt id aliquet risus feugiat in ante metus dictum. +Vitae congue eu consequat ac felis donec et odio pellentesque. Nunc mi ipsum faucibus vitae aliquet nec. Cursus vitae congue mauris rhoncus aenean vel elit scelerisque mauris. Id eu nisl nunc mi ipsum faucibus vitae aliquet nec. Ipsum consequat nisl vel pretium lectus. Etiam dignissim diam quis enim lobortis scelerisque fermentum. At elementum eu facilisis sed odio. Facilisis sed odio morbi quis. Ullamcorper sit amet risus nullam. Odio ut enim blandit volutpat maecenas volutpat blandit aliquam. Eu sem integer vitae justo eget. Tempus egestas sed sed risus pretium. Enim ut tellus elementum sagittis vitae et leo duis. Nec sagittis aliquam malesuada bibendum arcu. Felis imperdiet proin fermentum leo vel. Massa sed elementum tempus egestas sed. Mattis pellentesque id nibh tortor id aliquet lectus proin. Dictum varius duis at consectetur lorem donec. Lacus viverra vitae congue eu. +Nulla malesuada pellentesque elit eget. Odio eu feugiat pretium nibh ipsum consequat nisl. Leo urna molestie at elementum. Rhoncus mattis rhoncus urna neque viverra justo nec ultrices dui. Et malesuada fames ac turpis egestas sed. Metus vulputate eu scelerisque felis. Sodales ut etiam sit amet. Enim diam vulputate ut pharetra sit amet aliquam id diam. Volutpat ac tincidunt vitae semper quis lectus. Quis enim lobortis scelerisque fermentum dui faucibus in ornare. Rutrum quisque non tellus orci ac auctor augue. +Metus dictum at tempor commodo ullamcorper a lacus vestibulum. Blandit aliquam etiam erat velit scelerisque. Curabitur vitae nunc sed velit. Bibendum ut tristique et egestas quis. Pellentesque elit ullamcorper dignissim cras. Lacus luctus accumsan tortor posuere ac ut. Volutpat consequat mauris nunc congue nisi. Tristique sollicitudin nibh sit amet commodo. Justo donec enim diam vulputate ut pharetra sit amet. Quis imperdiet massa tincidunt nunc pulvinar. +Vitae ultricies leo integer malesuada. Facilisi morbi tempus iaculis urna. Viverra accumsan in nisl nisi scelerisque eu. Arcu cursus euismod quis viverra. Quis hendrerit dolor magna eget est lorem ipsum. Urna nec tincidunt praesent semper feugiat nibh sed pulvinar proin. Sodales ut eu sem integer. Laoreet sit amet cursus sit amet dictum sit amet. Mauris augue neque gravida in fermentum et. Ut ornare lectus sit amet est placerat in. Lectus vestibulum mattis ullamcorper velit sed ullamcorper morbi tincidunt. Porttitor lacus luctus accumsan tortor posuere ac ut consequat. Praesent tristique magna sit amet purus gravida quis. Euismod nisi porta lorem mollis aliquam ut. Amet mauris commodo quis imperdiet. Diam sollicitudin tempor id eu. In est ante in nibh mauris cursus mattis molestie. Ante metus dictum at tempor commodo ullamcorper a lacus vestibulum. Dolor morbi non arcu risus quis varius quam quisque id. +Et malesuada fames ac turpis egestas integer eget aliquet nibh. Nisl nunc mi ipsum faucibus vitae. Cursus turpis massa tincidunt dui. Sagittis orci a scelerisque purus semper eget duis at tellus. Nunc faucibus a pellentesque sit. Netus et malesuada fames ac. Cursus risus at ultrices mi tempus imperdiet nulla. Accumsan sit amet nulla facilisi. Aenean sed adipiscing diam donec adipiscing tristique risus. At risus viverra adipiscing at in tellus integer feugiat scelerisque. Proin libero nunc consequat interdum varius sit. Dictum at tempor commodo ullamcorper a lacus vestibulum sed. Amet venenatis urna cursus eget. Adipiscing diam donec adipiscing tristique. Vitae sapien pellentesque habitant morbi tristique senectus et. Amet justo donec enim diam vulputate ut pharetra sit. Sollicitudin nibh sit amet commodo nulla facilisi nullam vehicula ipsum. At erat pellentesque adipiscing commodo. Lobortis feugiat vivamus at augue. +Hac habitasse platea dictumst quisque sagittis. Eget gravida cum sociis natoque penatibus et magnis dis parturient. Nec sagittis aliquam malesuada bibendum arcu. Aenean sed adipiscing diam donec. Sed risus ultricies tristique nulla aliquet enim tortor at. Vitae tempus quam pellentesque nec. Arcu cursus euismod quis viverra nibh cras pulvinar mattis. Etiam dignissim diam quis enim lobortis scelerisque fermentum. Integer malesuada nunc vel risus commodo viverra. Duis convallis convallis tellus id interdum velit. Turpis in eu mi bibendum neque egestas congue quisque egestas. Facilisis mauris sit amet massa vitae tortor. Ac tincidunt vitae semper quis lectus nulla at volutpat diam. Quis viverra nibh cras pulvinar mattis nunc. Amet venenatis urna cursus eget nunc scelerisque viverra mauris in. +Tristique et egestas quis ipsum suspendisse ultrices gravida dictum fusce. Arcu non sodales neque sodales. Aliquam eleifend mi in nulla posuere sollicitudin aliquam ultrices. Ultricies leo integer malesuada nunc vel risus commodo. Tempus imperdiet nulla malesuada pellentesque elit eget gravida. Eget lorem dolor sed viverra ipsum nunc aliquet bibendum enim. Eget lorem dolor sed viverra ipsum nunc. Massa sed elementum tempus egestas sed sed risus. Auctor neque vitae tempus quam pellentesque nec nam. Tincidunt lobortis feugiat vivamus at augue eget. Facilisi etiam dignissim diam quis enim. Tortor id aliquet lectus proin nibh. Non odio euismod lacinia at quis risus sed. Dolor sit amet consectetur adipiscing elit duis tristique. Risus commodo viverra maecenas accumsan lacus vel facilisis. Sit amet cursus sit amet dictum sit. Quis blandit turpis cursus in. Eget arcu dictum varius duis at. +Scelerisque felis imperdiet proin fermentum. Porta lorem mollis aliquam ut porttitor leo a diam. Enim lobortis scelerisque fermentum dui faucibus in ornare quam viverra. Malesuada fames ac turpis egestas sed tempus urna et. Non sodales neque sodales ut etiam sit amet nisl purus. Orci dapibus ultrices in iaculis nunc sed augue. Aenean euismod elementum nisi quis eleifend quam adipiscing vitae. Platea dictumst quisque sagittis purus sit amet volutpat consequat. Urna neque viverra justo nec ultrices dui sapien eget. Id cursus metus aliquam eleifend mi in. Nulla malesuada pellentesque elit eget gravida cum. Eu non diam phasellus vestibulum lorem sed. Est placerat in egestas erat imperdiet sed euismod. Enim sed faucibus turpis in eu mi bibendum. Commodo odio aenean sed adipiscing diam donec. Purus in massa tempor nec. Morbi non arcu risus quis. +Consequat interdum varius sit amet mattis vulputate enim nulla. Faucibus pulvinar elementum integer enim neque volutpat ac. Enim nec dui nunc mattis enim ut. Dignissim cras tincidunt lobortis feugiat vivamus. Et egestas quis ipsum suspendisse ultrices gravida dictum fusce ut. Platea dictumst vestibulum rhoncus est. Odio aenean sed adipiscing diam. Adipiscing commodo elit at imperdiet dui accumsan. Vestibulum mattis ullamcorper velit sed. Quisque id diam vel quam. Mollis nunc sed id semper risus in hendrerit. Faucibus turpis in eu mi bibendum. +Quis viverra nibh cras pulvinar mattis nunc sed blandit libero. Sed libero enim sed faucibus turpis in. Sed risus pretium quam vulputate dignissim suspendisse. Pellentesque elit eget gravida cum. Rhoncus aenean vel elit scelerisque mauris pellentesque pulvinar pellentesque. Natoque penatibus et magnis dis. Cursus metus aliquam eleifend mi in. Aliquet lectus proin nibh nisl. Proin sagittis nisl rhoncus mattis. Gravida rutrum quisque non tellus orci. Dignissim suspendisse in est ante in nibh. Morbi tincidunt augue interdum velit euismod in pellentesque massa placerat. +Netus et malesuada fames ac turpis egestas. Massa vitae tortor condimentum lacinia quis vel eros donec ac. Nec ullamcorper sit amet risus nullam. Turpis nunc eget lorem dolor sed viverra ipsum nunc aliquet. Neque egestas congue quisque egestas. Tortor at risus viverra adipiscing at in tellus integer. Pellentesque elit eget gravida cum sociis natoque penatibus. Habitant morbi tristique senectus et netus. Sed faucibus turpis in eu mi bibendum neque egestas congue. Egestas tellus rutrum tellus pellentesque eu. +Integer vitae justo eget magna fermentum iaculis. Sodales ut etiam sit amet nisl. Sagittis id consectetur purus ut faucibus pulvinar. Habitasse platea dictumst vestibulum rhoncus est. Mi ipsum faucibus vitae aliquet nec ullamcorper sit. Vel fringilla est ullamcorper eget nulla facilisi etiam dignissim diam. Id semper risus in hendrerit gravida rutrum quisque. Diam vulputate ut pharetra sit amet. Maecenas sed enim ut sem viverra aliquet eget sit amet. Morbi tristique senectus et netus et malesuada. Scelerisque fermentum dui faucibus in ornare quam. Euismod elementum nisi quis eleifend quam adipiscing vitae. Et sollicitudin ac orci phasellus egestas tellus rutrum. Ligula ullamcorper malesuada proin libero nunc consequat interdum varius. Porta lorem mollis aliquam ut porttitor leo a diam sollicitudin. Phasellus egestas tellus rutrum tellus pellentesque eu tincidunt. Amet consectetur adipiscing elit duis. Proin nibh nisl condimentum id venenatis a condimentum vitae. Porttitor massa id neque aliquam. Eu augue ut lectus arcu bibendum at varius vel. +Malesuada nunc vel risus commodo. Vel fringilla est ullamcorper eget nulla facilisi. Amet consectetur adipiscing elit pellentesque habitant morbi tristique senectus. Elementum integer enim neque volutpat ac tincidunt. Purus in mollis nunc sed id semper risus. Pretium nibh ipsum consequat nisl vel. Tempus quam pellentesque nec nam aliquam. Pellentesque sit amet porttitor eget dolor morbi non arcu. Nibh ipsum consequat nisl vel pretium. Suspendisse in est ante in. Sed viverra ipsum nunc aliquet. Volutpat consequat mauris nunc congue nisi vitae suscipit tellus. Etiam tempor orci eu lobortis elementum nibh tellus molestie. +Penatibus et magnis dis parturient montes nascetur ridiculus. Et malesuada fames ac turpis. Adipiscing enim eu turpis egestas pretium. Ultrices in iaculis nunc sed augue lacus viverra. Enim sed faucibus turpis in eu mi bibendum neque egestas. Porta lorem mollis aliquam ut. Convallis posuere morbi leo urna molestie at. Faucibus ornare suspendisse sed nisi lacus sed. Augue neque gravida in fermentum et sollicitudin ac. Pulvinar etiam non quam lacus suspendisse. Tempus quam pellentesque nec nam aliquam sem et tortor consequat. Nec sagittis aliquam malesuada bibendum arcu vitae elementum curabitur. Eget lorem dolor sed viverra ipsum. Mi eget mauris pharetra et ultrices neque ornare aenean. Facilisi morbi tempus iaculis urna id volutpat lacus laoreet. +In pellentesque massa placerat duis ultricies lacus. Fames ac turpis egestas maecenas. Faucibus pulvinar elementum integer enim neque. Et sollicitudin ac orci phasellus egestas tellus. Eget nullam non nisi est sit amet facilisis. Est sit amet facilisis magna etiam tempor orci eu lobortis. Tristique risus nec feugiat in fermentum posuere urna nec. Iaculis eu non diam phasellus vestibulum. Ultrices eros in cursus turpis. Pellentesque eu tincidunt tortor aliquam. Vehicula ipsum a arcu cursus vitae congue mauris. Viverra nam libero justo laoreet. Risus at ultrices mi tempus imperdiet nulla malesuada pellentesque elit. Tincidunt vitae semper quis lectus nulla. Sed tempus urna et pharetra pharetra massa massa. Scelerisque in dictum non consectetur a. +Ornare arcu dui vivamus arcu felis bibendum. Massa id neque aliquam vestibulum morbi blandit cursus risus at. Tellus id interdum velit laoreet id donec ultrices tincidunt. Faucibus ornare suspendisse sed nisi lacus sed. Ipsum dolor sit amet consectetur adipiscing elit duis tristique sollicitudin. Cursus metus aliquam eleifend mi in nulla posuere sollicitudin. Neque viverra justo nec ultrices dui sapien eget mi. Id donec ultrices tincidunt arcu non sodales neque sodales ut. Tortor id aliquet lectus proin. Elementum integer enim neque volutpat ac tincidunt vitae semper. Elementum integer enim neque volutpat ac tincidunt vitae semper quis. Ut faucibus pulvinar elementum integer enim neque volutpat ac. Viverra suspendisse potenti nullam ac tortor. +Ornare suspendisse sed nisi lacus sed viverra tellus in hac. Maecenas ultricies mi eget mauris pharetra. Blandit volutpat maecenas volutpat blandit aliquam etiam erat. Pretium viverra suspendisse potenti nullam ac tortor. Aliquet porttitor lacus luctus accumsan tortor posuere ac ut. Arcu felis bibendum ut tristique et egestas quis ipsum suspendisse. Nunc non blandit massa enim nec. Risus pretium quam vulputate dignissim suspendisse in est ante in. Tellus cras adipiscing enim eu turpis. Urna et pharetra pharetra massa massa ultricies. Ut morbi tincidunt augue interdum velit euismod. Enim sed faucibus turpis in eu mi bibendum neque egestas. Eget duis at tellus at. Dui nunc mattis enim ut tellus. Semper auctor neque vitae tempus quam pellentesque. +Ridiculus mus mauris vitae ultricies leo integer malesuada. Sapien nec sagittis aliquam malesuada bibendum arcu vitae. Cras adipiscing enim eu turpis egestas pretium. Odio morbi quis commodo odio aenean sed. Bibendum at varius vel pharetra. Habitant morbi tristique senectus et netus et malesuada fames. Eget magna fermentum iaculis eu non diam phasellus vestibulum lorem. Quisque sagittis purus sit amet volutpat. Morbi tincidunt augue interdum velit euismod. Diam vel quam elementum pulvinar etiam non quam lacus suspendisse. Venenatis urna cursus eget nunc scelerisque viverra mauris. Tincidunt vitae semper quis lectus nulla at volutpat diam. Vulputate enim nulla aliquet porttitor lacus luctus accumsan. Vitae congue eu consequat ac felis. +Adipiscing at in tellus integer feugiat. Sed euismod nisi porta lorem mollis. Tellus rutrum tellus pellentesque eu tincidunt tortor aliquam nulla. Congue quisque egestas diam in arcu. Ornare lectus sit amet est placerat in egestas erat. Nulla posuere sollicitudin aliquam ultrices sagittis orci a scelerisque. Pharetra magna ac placerat vestibulum lectus. Non odio euismod lacinia at quis risus sed vulputate odio. Id ornare arcu odio ut sem. Diam sit amet nisl suscipit adipiscing bibendum est ultricies. Urna et pharetra pharetra massa massa ultricies mi quis. Enim tortor at auctor urna. Purus in mollis nunc sed id semper. +Eu ultrices vitae auctor eu augue. Ut morbi tincidunt augue interdum velit euismod in pellentesque massa. Elementum sagittis vitae et leo duis ut diam. Pulvinar etiam non quam lacus suspendisse faucibus interdum posuere lorem. Lorem dolor sed viverra ipsum nunc aliquet bibendum. Commodo odio aenean sed adipiscing diam donec adipiscing tristique risus. Dignissim convallis aenean et tortor at risus viverra adipiscing at. Donec et odio pellentesque diam. Ipsum a arcu cursus vitae congue mauris rhoncus aenean vel. Felis eget velit aliquet sagittis. Sit amet commodo nulla facilisi nullam vehicula ipsum. Tincidunt vitae semper quis lectus nulla at volutpat diam. Tellus pellentesque eu tincidunt tortor aliquam nulla facilisi cras fermentum. Placerat vestibulum lectus mauris ultrices eros in cursus turpis massa. Vel pharetra vel turpis nunc. +Id aliquet risus feugiat in ante metus dictum. Dignissim convallis aenean et tortor at. Ultrices tincidunt arcu non sodales neque sodales. Quis commodo odio aenean sed adipiscing. Ac placerat vestibulum lectus mauris ultrices eros in. Aenean vel elit scelerisque mauris pellentesque pulvinar pellentesque habitant morbi. Vel eros donec ac odio tempor orci dapibus ultrices in. Erat velit scelerisque in dictum. Scelerisque purus semper eget duis at tellus at. Porttitor leo a diam sollicitudin tempor id eu nisl nunc. Vel risus commodo viverra maecenas accumsan lacus vel facilisis. In dictum non consectetur a. Dui nunc mattis enim ut tellus elementum sagittis vitae et. In aliquam sem fringilla ut morbi. Eget arcu dictum varius duis at consectetur. Convallis a cras semper auctor neque vitae tempus quam pellentesque. +Ultrices neque ornare aenean euismod elementum. Risus at ultrices mi tempus imperdiet nulla malesuada pellentesque elit. Pretium fusce id velit ut tortor pretium viverra suspendisse potenti. Sollicitudin ac orci phasellus egestas. Consectetur adipiscing elit ut aliquam purus sit. Mi eget mauris pharetra et ultrices neque. Dolor magna eget est lorem ipsum dolor sit. Nec ultrices dui sapien eget mi proin sed. Massa ultricies mi quis hendrerit dolor magna. Habitasse platea dictumst quisque sagittis purus. Eu nisl nunc mi ipsum faucibus vitae aliquet nec. Eros donec ac odio tempor orci dapibus. +Viverra nam libero justo laoreet sit amet cursus sit. Convallis posuere morbi leo urna molestie at. Feugiat sed lectus vestibulum mattis ullamcorper velit sed ullamcorper. Vitae tempus quam pellentesque nec. Adipiscing elit ut aliquam purus sit amet luctus. Nisl nunc mi ipsum faucibus vitae aliquet. Quam elementum pulvinar etiam non quam lacus suspendisse faucibus. Facilisis gravida neque convallis a cras semper auctor neque. Aliquet porttitor lacus luctus accumsan tortor posuere ac ut consequat. Libero volutpat sed cras ornare arcu dui vivamus arcu felis. Nam at lectus urna duis convallis convallis. Quis hendrerit dolor magna eget est lorem ipsum dolor sit. Facilisis gravida neque convallis a cras. Morbi tristique senectus et netus et malesuada. Ullamcorper velit sed ullamcorper morbi tincidunt. Diam vel quam elementum pulvinar. Quisque sagittis purus sit amet volutpat consequat. \ No newline at end of file diff --git a/profiler/tester.py b/profiler/tester.py new file mode 100644 index 0000000000000000000000000000000000000000..8504ea59eebd7ccd4acd8d7388ee58a711b93350 --- /dev/null +++ b/profiler/tester.py @@ -0,0 +1,39 @@ +import requests, json, time + +url = "http://localhost:7878" +code = None + +components = [ + {"component_name":"component-1", "categories":["gpu"], "repository":"https://github.com/Supervisor/supervisor","language":"Python"}, + {"component_name":"component-2", "categories":["edge"], "repository":"https://github.com/lf-edge/edge-home-orchestration-go","language":"Go"}, + {"component_name":"component-3", "categories":["docker"], "repository":"https://github.com/eliasyilma/CNN","language":"Java"} +] + +def sendAnalyse(): + global code + response = requests.post(url=url + "/analyse", data=json.dumps(components)).text + print(response) + try: + _json = json.loads(response) + if _json["status"]: + code = _json["code"] + except Exception as e: + print(e) + + +def checkResponse(): + return requests.get(url=url+"/collect?code={0}".format(code)).text + + +def senario(): + sendAnalyse() + while True: + if not code: + time.sleep(10) + continue + response = checkResponse() + print(response) + time.sleep(30) + + +senario() \ No newline at end of file